Index: head/contrib/elftoolchain/libelf/gelf_mips64el.c =================================================================== --- head/contrib/elftoolchain/libelf/gelf_mips64el.c (revision 344854) +++ head/contrib/elftoolchain/libelf/gelf_mips64el.c (revision 344855) @@ -1,82 +1,81 @@ /*- * Copyright (c) 2018 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "_libelf.h" ELFTC_VCSID("$Id$"); int _libelf_is_mips64el(Elf *e) { return (e->e_kind == ELF_K_ELF && e->e_class == ELFCLASS64 && e->e_u.e_elf.e_ehdr.e_ehdr64->e_machine == EM_MIPS && e->e_u.e_elf.e_ehdr.e_ehdr64->e_ident[EI_DATA] == ELFDATA2LSB); } /* * For MIPS64, the r_info field is actually stored as a 32-bit symbol * index (r_sym) followed by four single-byte fields (r_ssym, r_type3, * r_type2, and r_type). The byte-swap for the little-endian case * jumbles this incorrectly so compensate. */ Elf64_Xword _libelf_mips64el_r_info_tof(Elf64_Xword r_info) { Elf64_Xword new_info; uint8_t ssym, type3, type2, type; ssym = r_info >> 24; type3 = r_info >> 16; type2 = r_info >> 8; type = r_info; new_info = r_info >> 32; new_info |= (Elf64_Xword)ssym << 32; new_info |= (Elf64_Xword)type3 << 40; new_info |= (Elf64_Xword)type2 << 48; new_info |= (Elf64_Xword)type << 56; return (new_info); } Elf64_Xword _libelf_mips64el_r_info_tom(Elf64_Xword r_info) { Elf64_Xword new_info; uint8_t ssym, type3, type2, type; ssym = r_info >> 32; type3 = r_info >> 40; type2 = r_info >> 48; type = r_info >> 56; new_info = (r_info & 0xffffffff) << 32; new_info |= (Elf64_Xword)ssym << 24; new_info |= (Elf64_Xword)type3 << 16; new_info |= (Elf64_Xword)type2 << 8; new_info |= (Elf64_Xword)type; return (new_info); } Index: head/lib/libc/tests/gen/makecontext_test.c =================================================================== --- head/lib/libc/tests/gen/makecontext_test.c (revision 344854) +++ head/lib/libc/tests/gen/makecontext_test.c (revision 344855) @@ -1,189 +1,188 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include static char uc_stack[16 * 1024]; static void check_1(int arg1) { ATF_REQUIRE_EQ(arg1, 1); } ATF_TC_WITHOUT_HEAD(makecontext_arg1); ATF_TC_BODY(makecontext_arg1, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_1, 1, 1); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } static void check_2(int arg1, int arg2) { ATF_REQUIRE_EQ(arg1, 1); ATF_REQUIRE_EQ(arg2, 2); } ATF_TC_WITHOUT_HEAD(makecontext_arg2); ATF_TC_BODY(makecontext_arg2, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_2, 2, 1, 2); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } static void check_3(int arg1, int arg2, int arg3) { ATF_REQUIRE_EQ(arg1, 1); ATF_REQUIRE_EQ(arg2, 2); ATF_REQUIRE_EQ(arg3, 3); } ATF_TC_WITHOUT_HEAD(makecontext_arg3); ATF_TC_BODY(makecontext_arg3, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_3, 3, 1, 2, 3); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } static void check_4(int arg1, int arg2, int arg3, int arg4) { ATF_REQUIRE_EQ(arg1, 1); ATF_REQUIRE_EQ(arg2, 2); ATF_REQUIRE_EQ(arg3, 3); ATF_REQUIRE_EQ(arg4, 4); } ATF_TC_WITHOUT_HEAD(makecontext_arg4); ATF_TC_BODY(makecontext_arg4, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_4, 4, 1, 2, 3, 4); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } static void check_5(int arg1, int arg2, int arg3, int arg4, int arg5) { ATF_REQUIRE_EQ(arg1, 1); ATF_REQUIRE_EQ(arg2, 2); ATF_REQUIRE_EQ(arg3, 3); ATF_REQUIRE_EQ(arg4, 4); ATF_REQUIRE_EQ(arg5, 5); } ATF_TC_WITHOUT_HEAD(makecontext_arg5); ATF_TC_BODY(makecontext_arg5, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_5, 5, 1, 2, 3, 4, 5); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } static void check_6(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6) { ATF_REQUIRE_EQ(arg1, 1); ATF_REQUIRE_EQ(arg2, 2); ATF_REQUIRE_EQ(arg3, 3); ATF_REQUIRE_EQ(arg4, 4); ATF_REQUIRE_EQ(arg5, 5); ATF_REQUIRE_EQ(arg6, 6); } ATF_TC_WITHOUT_HEAD(makecontext_arg6); ATF_TC_BODY(makecontext_arg6, tc) { ucontext_t ctx[2]; ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0); ctx[1].uc_stack.ss_sp = uc_stack; ctx[1].uc_stack.ss_size = sizeof(uc_stack); ctx[1].uc_link = &ctx[0]; makecontext(&ctx[1], (void (*)(void))check_6, 6, 1, 2, 3, 4, 5, 6); ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, makecontext_arg1); ATF_TP_ADD_TC(tp, makecontext_arg2); ATF_TP_ADD_TC(tp, makecontext_arg3); ATF_TP_ADD_TC(tp, makecontext_arg4); ATF_TP_ADD_TC(tp, makecontext_arg5); ATF_TP_ADD_TC(tp, makecontext_arg6); return (atf_no_error()); } Index: head/lib/libdevctl/devctl.3 =================================================================== --- head/lib/libdevctl/devctl.3 (revision 344854) +++ head/lib/libdevctl/devctl.3 (revision 344855) @@ -1,394 +1,393 @@ .\" .\" Copyright (c) 2014 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd August 22, 2018 .Dt DEVCTL 3 .Os .Sh NAME .Nm devctl , .Nm devctl_attach , .Nm devctl_clear_driver , .Nm devctl_delete , .Nm devctl_detach , .Nm devctl_disable , .Nm devctl_enable , .Nm devctl_freeze , .Nm devctl_rescan , .Nm devctl_resume , .Nm devctl_set_driver , .Nm devctl_suspend , .Nm devctl_thaw .Nd device control library .Sh LIBRARY .Lb libdevctl .Sh SYNOPSIS .In devctl.h .Ft int .Fn devctl_attach "const char *device" .Ft int .Fn devctl_clear_driver "const char *device" "bool force" .Ft int .Fn devctl_delete "const char *device" "bool force" .Ft int .Fn devctl_detach "const char *device" "bool force" .Ft int .Fn devctl_disable "const char *device" "bool force_detach" .Ft int .Fn devctl_enable "const char *device" .Ft int .Fn devctl_freeze "void" .Ft int .Fn devctl_rescan "const char *device" .Ft int .Fn devctl_resume "const char *device" .Ft int .Fn devctl_set_driver "const char *device" "const char *driver" "bool force" .Ft int .Fn devctl_suspend "const char *device" .Ft int .Fn devctl_thaw "void" .Sh DESCRIPTION The .Nm library adjusts the state of devices in the kernel's internal device hierarchy. Each control operation accepts a .Fa device argument that identifies the device to adjust. The .Fa device may be specified as either the name of an existing device or as a bus-specific address. The following bus-specific address formats are currently supported: .Bl -tag -offset indent .It Sy pci Ns Fa domain Ns : Ns Fa bus Ns : Ns Fa slot Ns : Ns Fa function A PCI device with the specified .Fa domain , .Fa bus , .Fa slot , and .Fa function . .It Sy pci Ns Fa bus Ns : Ns Fa slot Ns : Ns Fa function A PCI device in domain zero with the specified .Fa bus , .Fa slot , and .Fa function . .It Fa handle A device with an ACPI handle of .Fa handle . The handle must be specified as an absolute path and must begin with a .Dq \e . .El .Pp The .Fn devctl_attach function probes a device and attaches a suitable device driver if one is found. .Pp The .Fn devctl_detach function detaches a device from its current device driver. The device is left detached until either a new driver for its parent bus is loaded or the device is explicitly probed via .Fn devctl_attach . If .Fa force is true, the current device driver will be detached even if the device is busy. .Pp The .Fn devctl_delete function deletes a device from the device tree. No If .Fa force is true, the device is deleted even if the device is physically present. .Pp The .Fn devctl_disable function disables a device. If the device is currently attached to a device driver, the device driver will be detached from the device, but the device will retain its current name. If .Fa force_detach is true, the current device driver will be detached even if the device is busy. The device will remain disabled and detached until it is explicitly enabled via .Fn devctl_enable . .Pp The .Fn devctl_enable function re-enables a disabled device. The device will probe and attach if a suitable device driver is found. .Pp The .Fn devctl_suspend function suspends a device. This may include placing the device in a reduced power state, but any device driver currently attached to the device will remain attached. .Pp The .Fn devctl_resume function resumes a suspended device to a fully working state. .Pp The .Fn devctl_set_driver function attaches a device driver named .Fa driver to a device. If the device is already attached and .Fa force is false, the request will fail. If the device is already attached and .Fa force is true, the device will be detached from its current device driver before it is attached to the new device driver. .Pp The .Fn devctl_clear_driver function resets a device so that it can be attached to any valid device driver rather than only drivers with a previously specified name. This function is used to undo a previous call to .Fn devctl_set_driver . If the device is already attached and .Fa force is false, the request will fail. If the device is already attached and .Fa force is true, the device will be detached from its current device driver. After the device's name is reset, it is reprobed and attached to a suitable device driver if one is found. .Pp The .Fn devctl_rescan function rescans a bus device checking for devices that have been added or removed. .Pp The .Fn devctl_freeze function freezes probe and attach processing initiated in response to drivers being loaded. .Pp The .Fn devctl_thaw function resumes (thaws the freeze) probe and attach processing initiated in response to drivers being loaded. .Sh RETURN VALUES .Rv -std devctl_attach devctl_clear_driver devctl_delete devctl_detach \ devctl_disable devctl_enable devctl_suspend devctl_rescan devctl_resume \ devctl_set_driver .Sh ERRORS In addition to specific errors noted below, all of the .Nm functions may fail for any of the errors described in .Xr open 2 as well as: .Bl -tag -width Er .It Bq Er EINVAL The device name is too long. .It Bq Er ENOENT No existing device matches the specified name or location. .It Bq Er EPERM The current process is not permitted to adjust the state of .Fa device . .El .Pp The .Fn devctl_attach function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is already attached. .It Bq Er ENOMEM An internal memory allocation request failed. .It Bq Er ENXIO The device is disabled. .It Bq Er ENXIO No suitable driver for the device could be found, or the driver failed to attach. .El .Pp The .Fn devctl_detach function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The current device driver for .Fa device is busy and cannot detach at this time. Note that some drivers may return this even if .Fa force is true. .It Bq Er ENXIO The device is not attached to a driver. .It Bq Er ENXIO The current device driver for .Fa device does not support detaching. .El .Pp The .Fn devctl_enable function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is already enabled. .It Bq Er ENOMEM An internal memory allocation request failed. .It Bq Er ENXIO No suitable driver for the device could be found, or the driver failed to attach. .El .Pp The .Fn devctl_disable function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The current device driver for .Fa device is busy and cannot detach at this time. Note that some drivers may return this even if .Fa force_detach is true. .It Bq Er ENXIO The device is already disabled. .It Bq Er ENXIO The current device driver for .Fa device does not support detaching. .El .Pp The .Fn devctl_suspend function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is already suspended. .It Bq Er EINVAL The device to be suspended is the root bus device. .El .Pp The .Fn devctl_resume function may fail if: .Bl -tag -width Er .It Bq Er EINVAL The device is not suspended. .It Bq Er EINVAL The device to be resumed is the root bus device. .El .Pp The .Fn devctl_set_driver function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is currently attached to a device driver and .Fa force is false. .It Bq Er EBUSY The current device driver for .Fa device is busy and cannot detach at this time. .It Bq Er EFAULT The .Fa driver argument points outside the process' allocated address space. .It Bq Er ENOENT No device driver with the requested name exists. .It Bq Er ENOMEM An internal memory allocation request failed. .It Bq Er ENXIO The device is disabled. .It Bq Er ENXIO The new device driver failed to attach. .El .Pp The .Fn devctl_clear_driver function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is currently attached to a device driver and .Fa force is false. .It Bq Er EBUSY The current device driver for .Fa device is busy and cannot detach at this time. .It Bq Er EINVAL The device is not configured for a specific device driver name. .It Bq Er ENXIO The device driver chosen after reprobing failed to attach. .El .Pp The .Fn devctl_rescan function may fail if: .Bl -tag -width Er .It Bq Er ENXIO The device is not attached to a driver. .It Bq Er ENXIO The bus driver does not support rescanning. .El .Pp The .Fn devctl_delete function may fail if: .Bl -tag -width Er .It Bq Er EBUSY The device is physically present and .Fa force is false. .It Bq Er EINVAL .Fa dev is the root device of the device tree. .El .Sh SEE ALSO .Xr devinfo 3 , .Xr devstat 3 , .Xr devctl 8 .Sh HISTORY The .Nm library first appeared in .Fx 10.3 . .Sh BUGS If a device is suspended individually via .Fn devctl_suspend and the entire machine is subsequently suspended, the device will be resumed when the machine resumes. Index: head/lib/libdevctl/devctl.c =================================================================== --- head/lib/libdevctl/devctl.c (revision 344854) +++ head/lib/libdevctl/devctl.c (revision 344855) @@ -1,161 +1,160 @@ /*- * Copyright (c) 2014 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "devctl.h" static int devctl_request(u_long cmd, struct devreq *req) { static int devctl2_fd = -1; if (devctl2_fd == -1) { devctl2_fd = open("/dev/devctl2", O_RDONLY); if (devctl2_fd == -1) return (-1); } return (ioctl(devctl2_fd, cmd, req)); } static int devctl_simple_request(u_long cmd, const char *name, int flags) { struct devreq req; memset(&req, 0, sizeof(req)); if (strlcpy(req.dr_name, name, sizeof(req.dr_name)) >= sizeof(req.dr_name)) { errno = EINVAL; return (-1); } req.dr_flags = flags; return (devctl_request(cmd, &req)); } int devctl_attach(const char *device) { return (devctl_simple_request(DEV_ATTACH, device, 0)); } int devctl_detach(const char *device, bool force) { return (devctl_simple_request(DEV_DETACH, device, force ? DEVF_FORCE_DETACH : 0)); } int devctl_enable(const char *device) { return (devctl_simple_request(DEV_ENABLE, device, 0)); } int devctl_disable(const char *device, bool force_detach) { return (devctl_simple_request(DEV_DISABLE, device, force_detach ? DEVF_FORCE_DETACH : 0)); } int devctl_suspend(const char *device) { return (devctl_simple_request(DEV_SUSPEND, device, 0)); } int devctl_resume(const char *device) { return (devctl_simple_request(DEV_RESUME, device, 0)); } int devctl_set_driver(const char *device, const char *driver, bool force) { struct devreq req; memset(&req, 0, sizeof(req)); if (strlcpy(req.dr_name, device, sizeof(req.dr_name)) >= sizeof(req.dr_name)) { errno = EINVAL; return (-1); } req.dr_data = __DECONST(char *, driver); if (force) req.dr_flags |= DEVF_SET_DRIVER_DETACH; return (devctl_request(DEV_SET_DRIVER, &req)); } int devctl_clear_driver(const char *device, bool force) { return (devctl_simple_request(DEV_CLEAR_DRIVER, device, force ? DEVF_CLEAR_DRIVER_DETACH : 0)); } int devctl_rescan(const char *device) { return (devctl_simple_request(DEV_RESCAN, device, 0)); } int devctl_delete(const char *device, bool force) { return (devctl_simple_request(DEV_DELETE, device, force ? DEVF_FORCE_DELETE : 0)); } int devctl_freeze(void) { return (devctl_simple_request(DEV_FREEZE, "", 0)); } int devctl_thaw(void) { return (devctl_simple_request(DEV_THAW, "", 0)); } Index: head/lib/libdevctl/devctl.h =================================================================== --- head/lib/libdevctl/devctl.h (revision 344854) +++ head/lib/libdevctl/devctl.h (revision 344855) @@ -1,47 +1,46 @@ /*- * Copyright (c) 2014 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __DEVCTL_H__ #define __DEVCTL_H__ #include int devctl_attach(const char *device); int devctl_detach(const char *device, bool force); int devctl_enable(const char *device); int devctl_disable(const char *device, bool force_detach); int devctl_suspend(const char *device); int devctl_resume(const char *device); int devctl_set_driver(const char *device, const char *driver, bool force); int devctl_clear_driver(const char *device, bool force); int devctl_rescan(const char *device); int devctl_delete(const char *device, bool force); int devctl_freeze(void); int devctl_thaw(void); #endif /* !__DEVCTL_H__ */ Index: head/lib/libkvm/kvm_aarch64.h =================================================================== --- head/lib/libkvm/kvm_aarch64.h (revision 344854) +++ head/lib/libkvm/kvm_aarch64.h (revision 344855) @@ -1,67 +1,66 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_AARCH64_H__ #define __KVM_AARCH64_H__ #ifdef __aarch64__ #include #endif typedef uint64_t aarch64_physaddr_t; typedef uint64_t aarch64_pte_t; #define AARCH64_PAGE_SHIFT 12 #define AARCH64_PAGE_SIZE (1 << AARCH64_PAGE_SHIFT) #define AARCH64_PAGE_MASK (AARCH64_PAGE_SIZE - 1) /* Source: arm64/include/pte.h */ #define AARCH64_ATTR_MASK 0xfff0000000000fff #define AARCH64_ATTR_UXN (1ULL << 54) #define AARCH64_ATTR_PXN (1ULL << 53) #define AARCH64_ATTR_XN (AARCH64_ATTR_PXN | AARCH64_ATTR_UXN) #define AARCH64_ATTR_AP(x) ((x) << 6) #define AARCH64_ATTR_AP_RO (1 << 1) #define AARCH64_ATTR_DESCR_MASK 3 #define AARCH64_L3_SHIFT 12 #define AARCH64_L3_PAGE 0x3 #ifdef __aarch64__ _Static_assert(PAGE_SHIFT == AARCH64_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == AARCH64_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == AARCH64_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(ATTR_MASK == AARCH64_ATTR_MASK, "ATTR_MASK mismatch"); _Static_assert(ATTR_DESCR_MASK == AARCH64_ATTR_DESCR_MASK, "ATTR_DESCR_MASK mismatch"); _Static_assert(L3_SHIFT == AARCH64_L3_SHIFT, "L3_SHIFT mismatch"); _Static_assert(L3_PAGE == AARCH64_L3_PAGE, "L3_PAGE mismatch"); #endif #endif /* !__KVM_AARCH64_H__ */ Index: head/lib/libkvm/kvm_amd64.h =================================================================== --- head/lib/libkvm/kvm_amd64.h (revision 344854) +++ head/lib/libkvm/kvm_amd64.h (revision 344855) @@ -1,90 +1,89 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_AMD64_H__ #define __KVM_AMD64_H__ #ifdef __amd64__ #include #include #endif typedef uint64_t amd64_physaddr_t; typedef uint64_t amd64_pte_t; typedef uint64_t amd64_pde_t; typedef uint64_t amd64_pdpe_t; typedef uint64_t amd64_pml4e_t; #define AMD64_NPTEPG (AMD64_PAGE_SIZE / sizeof(amd64_pte_t)) #define AMD64_PAGE_SHIFT 12 #define AMD64_PAGE_SIZE (1 << AMD64_PAGE_SHIFT) #define AMD64_PAGE_MASK (AMD64_PAGE_SIZE - 1) #define AMD64_NPDEPG (AMD64_PAGE_SIZE / sizeof(amd64_pde_t)) #define AMD64_PDRSHIFT 21 #define AMD64_NBPDR (1 << AMD64_PDRSHIFT) #define AMD64_PDRMASK (AMD64_NBPDR - 1) #define AMD64_NPDPEPG (AMD64_PAGE_SIZE / sizeof(amd64_pdpe_t)) #define AMD64_PDPSHIFT 30 #define AMD64_NBPDP (1 << AMD64_PDPSHIFT) #define AMD64_PDPMASK (AMD64_NBPDP - 1) #define AMD64_NPML4EPG (AMD64_PAGE_SIZE / sizeof(amd64_pml4e_t)) #define AMD64_PML4SHIFT 39 #define AMD64_PG_NX (1ULL << 63) #define AMD64_PG_V 0x001 #define AMD64_PG_RW 0x002 #define AMD64_PG_PS 0x080 #define AMD64_PG_FRAME (0x000ffffffffff000) #define AMD64_PG_PS_FRAME (0x000fffffffe00000) #define AMD64_PG_1GB_FRAME (0x000fffffc0000000) #ifdef __amd64__ _Static_assert(NPTEPG == AMD64_NPTEPG, "NPTEPG mismatch"); _Static_assert(PAGE_SHIFT == AMD64_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == AMD64_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == AMD64_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(NPDEPG == AMD64_NPDEPG, "NPDEPG mismatch"); _Static_assert(PDRSHIFT == AMD64_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(NBPDR == AMD64_NBPDR, "NBPDR mismatch"); _Static_assert(PDRMASK == AMD64_PDRMASK, "PDRMASK mismatch"); _Static_assert(NPDPEPG == AMD64_NPDPEPG, "NPDPEPG mismatch"); _Static_assert(PDPSHIFT == AMD64_PDPSHIFT, "PDPSHIFT mismatch"); _Static_assert(NBPDP == AMD64_NBPDP, "NBPDP mismatch"); _Static_assert(PDPMASK == AMD64_PDPMASK, "PDPMASK mismatch"); _Static_assert(NPML4EPG == AMD64_NPML4EPG, "NPML4EPG mismatch"); _Static_assert(PML4SHIFT == AMD64_PML4SHIFT, "PML4SHIFT mismatch"); _Static_assert(PG_V == AMD64_PG_V, "PG_V mismatch"); _Static_assert(PG_PS == AMD64_PG_PS, "PG_PS mismatch"); _Static_assert(PG_FRAME == AMD64_PG_FRAME, "PG_FRAME mismatch"); _Static_assert(PG_PS_FRAME == AMD64_PG_PS_FRAME, "PG_PS_FRAME mismatch"); #endif int _amd64_native(kvm_t *); #endif /* !__KVM_AMD64_H__ */ Index: head/lib/libkvm/kvm_arm.h =================================================================== --- head/lib/libkvm/kvm_arm.h (revision 344854) +++ head/lib/libkvm/kvm_arm.h (revision 344855) @@ -1,126 +1,125 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_ARM_H__ #define __KVM_ARM_H__ typedef uint32_t arm_physaddr_t; typedef uint32_t arm_pd_entry_t; typedef uint32_t arm_pt_entry_t; #define ARM_PAGE_SHIFT 12 #define ARM_PAGE_SIZE (1 << ARM_PAGE_SHIFT) /* Page size */ #define ARM_PAGE_MASK (ARM_PAGE_SIZE - 1) #define ARM_L1_TABLE_SIZE 0x4000 /* 16K */ #define ARM_L1_S_SIZE 0x00100000 /* 1M */ #define ARM_L1_S_OFFSET (ARM_L1_S_SIZE - 1) #define ARM_L1_S_FRAME (~ARM_L1_S_OFFSET) #define ARM_L1_S_SHIFT 20 #define ARM_L2_L_SIZE 0x00010000 /* 64K */ #define ARM_L2_L_OFFSET (ARM_L2_L_SIZE - 1) #define ARM_L2_L_FRAME (~ARM_L2_L_OFFSET) #define ARM_L2_L_SHIFT 16 #define ARM_L2_S_SIZE 0x00001000 /* 4K */ #define ARM_L2_S_OFFSET (ARM_L2_S_SIZE - 1) #define ARM_L2_S_FRAME (~ARM_L2_S_OFFSET) #define ARM_L2_S_SHIFT 12 #define ARM_L2_TEX1 0x00000080 #define ARM_PTE2_RO ARM_L2_TEX1 #define ARM_L2_NX 0x00000001 #define ARM_PTE2_NX ARM_L2_NX /* * Note: L2_S_PROT_W differs depending on whether the system is generic or * xscale. This isn't easily accessible in this context, so use an * approximation of 'xscale' which is a subset of 'generic'. */ #define ARM_L2_AP0(x) ((x) << 4) #define ARM_AP_W 0x01 #define ARM_L2_S_PROT_W (ARM_L2_AP0(ARM_AP_W)) #define ARM_L1_TYPE_INV 0x00 /* Invalid (fault) */ #define ARM_L1_TYPE_C 0x01 /* Coarse L2 */ #define ARM_L1_TYPE_S 0x02 /* Section */ #define ARM_L1_TYPE_MASK 0x03 /* Mask of type bits */ #define ARM_L1_S_ADDR_MASK 0xfff00000 /* phys address of section */ #define ARM_L1_C_ADDR_MASK 0xfffffc00 /* phys address of L2 Table */ #define ARM_L2_TYPE_INV 0x00 /* Invalid (fault) */ #define ARM_L2_TYPE_L 0x01 /* Large Page - 64k */ #define ARM_L2_TYPE_S 0x02 /* Small Page - 4k */ #define ARM_L2_TYPE_T 0x03 /* Tiny Page - 1k - not used */ #define ARM_L2_TYPE_MASK 0x03 #ifdef __arm__ #include #if __ARM_ARCH >= 6 #include #else #include #endif _Static_assert(PAGE_SHIFT == ARM_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == ARM_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == ARM_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(L1_TABLE_SIZE == ARM_L1_TABLE_SIZE, "L1_TABLE_SIZE mismatch"); _Static_assert(L1_S_SIZE == ARM_L1_S_SIZE, "L1_S_SIZE mismatch"); _Static_assert(L1_S_OFFSET == ARM_L1_S_OFFSET, "L1_S_OFFSET mismatch"); _Static_assert(L1_S_FRAME == ARM_L1_S_FRAME, "L1_S_FRAME mismatch"); _Static_assert(L1_S_SHIFT == ARM_L1_S_SHIFT, "L1_S_SHIFT mismatch"); _Static_assert(L2_L_SIZE == ARM_L2_L_SIZE, "L2_L_SIZE mismatch"); _Static_assert(L2_L_OFFSET == ARM_L2_L_OFFSET, "L2_L_OFFSET mismatch"); _Static_assert(L2_L_FRAME == ARM_L2_L_FRAME, "L2_L_FRAME mismatch"); _Static_assert(L2_L_SHIFT == ARM_L2_L_SHIFT, "L2_L_SHIFT mismatch"); _Static_assert(L2_S_SIZE == ARM_L2_S_SIZE, "L2_S_SIZE mismatch"); _Static_assert(L2_S_OFFSET == ARM_L2_S_OFFSET, "L2_S_OFFSET mismatch"); _Static_assert(L2_S_FRAME == ARM_L2_S_FRAME, "L2_S_FRAME mismatch"); _Static_assert(L2_S_SHIFT == ARM_L2_S_SHIFT, "L2_S_SHIFT mismatch"); _Static_assert(L1_TYPE_INV == ARM_L1_TYPE_INV, "L1_TYPE_INV mismatch"); _Static_assert(L1_TYPE_C == ARM_L1_TYPE_C, "L1_TYPE_C mismatch"); _Static_assert(L1_TYPE_S == ARM_L1_TYPE_S, "L1_TYPE_S mismatch"); _Static_assert(L1_TYPE_MASK == ARM_L1_TYPE_MASK, "L1_TYPE_MASK mismatch"); _Static_assert(L1_S_ADDR_MASK == ARM_L1_S_ADDR_MASK, "L1_S_ADDR_MASK mismatch"); _Static_assert(L1_C_ADDR_MASK == ARM_L1_C_ADDR_MASK, "L1_C_ADDR_MASK mismatch"); _Static_assert(L2_TYPE_INV == ARM_L2_TYPE_INV, "L2_TYPE_INV mismatch"); _Static_assert(L2_TYPE_L == ARM_L2_TYPE_L, "L2_TYPE_L mismatch"); _Static_assert(L2_TYPE_S == ARM_L2_TYPE_S, "L2_TYPE_S mismatch"); #if __ARM_ARCH < 6 _Static_assert(L2_TYPE_T == ARM_L2_TYPE_T, "L2_TYPE_T mismatch"); #endif _Static_assert(L2_TYPE_MASK == ARM_L2_TYPE_MASK, "L2_TYPE_MASK mismatch"); #endif int _arm_native(kvm_t *); #endif /* !__KVM_ARM_H__ */ Index: head/lib/libkvm/kvm_i386.h =================================================================== --- head/lib/libkvm/kvm_i386.h (revision 344854) +++ head/lib/libkvm/kvm_i386.h (revision 344855) @@ -1,84 +1,83 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_I386_H__ #define __KVM_I386_H__ #ifdef __i386__ #include #include #endif typedef uint32_t i386_physaddr_t; typedef uint32_t i386_pte_t; typedef uint32_t i386_pde_t; typedef uint64_t i386_physaddr_pae_t; typedef uint64_t i386_pte_pae_t; typedef uint64_t i386_pde_pae_t; #define I386_PAGE_SHIFT 12 #define I386_PAGE_SIZE (1 << I386_PAGE_SHIFT) #define I386_PAGE_MASK (I386_PAGE_SIZE - 1) #define I386_NPTEPG (I386_PAGE_SIZE / sizeof(i386_pte_t)) #define I386_PDRSHIFT 22 #define I386_NBPDR (1 << I386_PDRSHIFT) #define I386_PAGE_PS_MASK (I386_NBPDR - 1) #define I386_NPTEPG_PAE (I386_PAGE_SIZE / sizeof(i386_pte_pae_t)) #define I386_PDRSHIFT_PAE 21 #define I386_NBPDR_PAE (1 << I386_PDRSHIFT_PAE) #define I386_PAGE_PS_MASK_PAE (I386_NBPDR_PAE - 1) /* Source: i386/include/pmap.h */ #define I386_PG_V 0x001 #define I386_PG_RW 0x002 #define I386_PG_PS 0x080 #define I386_PG_NX (1ULL << 63) #define I386_PG_FRAME_PAE (0x000ffffffffff000ull) #define I386_PG_PS_FRAME_PAE (0x000fffffffe00000ull) #define I386_PG_FRAME (0xfffff000) #define I386_PG_PS_FRAME (0xffc00000) #ifdef __i386__ _Static_assert(PAGE_SHIFT == I386_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == I386_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == I386_PAGE_MASK, "PAGE_MASK mismatch"); #if 0 _Static_assert(NPTEPG == I386_NPTEPG, "NPTEPG mismatch"); _Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch"); #endif _Static_assert(PDRSHIFT_NOPAE == I386_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(PG_V == I386_PG_V, "PG_V mismatch"); _Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch"); _Static_assert((u_int)PG_FRAME_NOPAE == I386_PG_FRAME, "PG_FRAME mismatch"); _Static_assert(PG_PS_FRAME_NOPAE == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); #endif int _i386_native(kvm_t *); #endif /* !__KVM_I386_H__ */ Index: head/lib/libkvm/kvm_mips.h =================================================================== --- head/lib/libkvm/kvm_mips.h (revision 344854) +++ head/lib/libkvm/kvm_mips.h (revision 344855) @@ -1,118 +1,117 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_MIPS_H__ #define __KVM_MIPS_H__ #ifdef __mips__ #include #endif typedef uint64_t mips_physaddr_t; typedef uint32_t mips32_pte_t; typedef uint64_t mips64_pte_t; #define MIPS_PAGE_SHIFT 12 #define MIPS_PAGE_SIZE (1 << MIPS_PAGE_SHIFT) #define MIPS_PAGE_MASK (MIPS_PAGE_SIZE - 1) #define MIPS32_KSEG0_START 0x80000000 #define MIPS32_KSEG0_END 0x9fffffff #define MIPS32_KSEG1_START 0xa0000000 #define MIPS32_KSEG1_END 0xbfffffff #define MIPS64_KSEG0_START 0xffffffff80000000 #define MIPS64_KSEG0_END 0xffffffff9fffffff #define MIPS64_KSEG1_START 0xffffffffa0000000 #define MIPS64_KSEG1_END 0xffffffffbfffffff #define MIPS32_PFN_MASK (0x1FFFFFC0) #define MIPS64_PFN_MASK 0x3FFFFFFC0 #define MIPS_PFN_SHIFT (6) #define MIPS_PFN_TO_PA(pfn) (((pfn) >> MIPS_PFN_SHIFT) << MIPS_PAGE_SHIFT) #define MIPS32_PTE_TO_PFN(pte) ((pte) & MIPS32_PFN_MASK) #define MIPS32_PTE_TO_PA(pte) (MIPS_PFN_TO_PA(MIPS32_PTE_TO_PFN((pte)))) #define MIPS64_PTE_TO_PFN(pte) ((pte) & MIPS64_PFN_MASK) #define MIPS64_PTE_TO_PA(pte) (MIPS_PFN_TO_PA(MIPS64_PTE_TO_PFN((pte)))) #define MIPS32_SWBITS_SHIFT 29 #define MIPS64_SWBITS_SHIFT 55 #define MIPS_PTE_V 0x02 #define MIPS32_PTE_RO ((mips32_pte_t)0x01 << MIPS32_SWBITS_SHIFT) #define MIPS64_PTE_RO ((mips64_pte_t)0x01 << MIPS64_SWBITS_SHIFT) static inline mips32_pte_t _mips32_pte_get(kvm_t *kd, u_long pteindex) { mips32_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte)); return _kvm32toh(kd, *pte); } static inline mips64_pte_t _mips64_pte_get(kvm_t *kd, u_long pteindex) { mips64_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte)); return _kvm64toh(kd, *pte); } #ifdef __mips__ _Static_assert(PAGE_SHIFT == MIPS_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == MIPS_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == MIPS_PAGE_MASK, "PAGE_MASK mismatch"); #ifdef __mips_n64 _Static_assert((uint64_t)MIPS_KSEG0_START == MIPS64_KSEG0_START, "MIPS_KSEG0_START mismatch"); _Static_assert((uint64_t)MIPS_KSEG0_END == MIPS64_KSEG0_END, "MIPS_KSEG0_END mismatch"); _Static_assert((uint64_t)MIPS_KSEG1_START == MIPS64_KSEG1_START, "MIPS_KSEG1_START mismatch"); _Static_assert((uint64_t)MIPS_KSEG1_END == MIPS64_KSEG1_END, "MIPS_KSEG1_END mismatch"); #else _Static_assert((uint32_t)MIPS_KSEG0_START == MIPS32_KSEG0_START, "MIPS_KSEG0_START mismatch"); _Static_assert((uint32_t)MIPS_KSEG0_END == MIPS32_KSEG0_END, "MIPS_KSEG0_END mismatch"); _Static_assert((uint32_t)MIPS_KSEG1_START == MIPS32_KSEG1_START, "MIPS_KSEG1_START mismatch"); _Static_assert((uint32_t)MIPS_KSEG1_END == MIPS32_KSEG1_END, "MIPS_KSEG1_END mismatch"); #endif #if defined(__mips_n64) || defined(__mips_n32) _Static_assert(TLBLO_PFN_MASK == MIPS64_PFN_MASK, "TLBLO_PFN_MASK mismatch"); #else _Static_assert(TLBLO_PFN_MASK == MIPS32_PFN_MASK, "TLBLO_PFN_MASK mismatch"); #endif _Static_assert(TLBLO_PFN_SHIFT == MIPS_PFN_SHIFT, "TLBLO_PFN_SHIFT mismatch"); _Static_assert(TLB_PAGE_SHIFT == MIPS_PAGE_SHIFT, "TLB_PAGE_SHIFT mismatch"); #endif #endif /* !__KVM_MIPS_H__ */ Index: head/lib/libkvm/kvm_native.3 =================================================================== --- head/lib/libkvm/kvm_native.3 (revision 344854) +++ head/lib/libkvm/kvm_native.3 (revision 344855) @@ -1,62 +1,61 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd March 15, 2017 .Dt KVM_NATIVE 3 .Os .Sh NAME .Nm kvm_native .Nd is a kvm descriptor opened on a native kernel image .Sh LIBRARY .Lb libkvm .Sh SYNOPSIS .In kvm.h .Ft int .Fn kvm_native "kvm_t *kd" .Sh DESCRIPTION The .Nm kvm library provides an interface for accessing kernel virtual memory images for both native kernel images .Pq where the ABI of the kernel executable matches the host system and non-native kernel images. The .Fn kvm_native function returns a non-zero value if the kvm descriptor .Fa kd is attached to a native kernel image; otherwise it returns zero. .Sh RETURN VALUES The .Fn kvm_native function returns a non-zero value if the kvm descriptor .Fa kd is attached to a native kernel image; otherwise it returns zero. .Sh SEE ALSO .Xr kvm 3 , .Xr kvm_open2 3 Index: head/lib/libkvm/kvm_riscv.h =================================================================== --- head/lib/libkvm/kvm_riscv.h (revision 344854) +++ head/lib/libkvm/kvm_riscv.h (revision 344855) @@ -1,90 +1,89 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * Copyright (c) 2019 Mitchell Horne * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_RISCV_H__ #define __KVM_RISCV_H__ #ifdef __riscv #include #endif typedef uint64_t riscv_physaddr_t; typedef uint64_t riscv_pt_entry_t; #define RISCV_PAGE_SHIFT 12 #define RISCV_PAGE_SIZE (1 << RISCV_PAGE_SHIFT) #define RISCV_PAGE_MASK (RISCV_PAGE_SIZE - 1) /* Source: sys/riscv/include/pte.h */ #define RISCV_L3_SHIFT 12 #define RISCV_L3_SIZE (1 << L3_SHIFT) #define RISCV_L3_OFFSET (L3_SIZE - 1) #define RISCV_PTE_SW_MANAGED (1 << 9) #define RISCV_PTE_SW_WIRED (1 << 8) #define RISCV_PTE_D (1 << 7) /* Dirty */ #define RISCV_PTE_A (1 << 6) /* Accessed */ #define RISCV_PTE_G (1 << 5) /* Global */ #define RISCV_PTE_U (1 << 4) /* User */ #define RISCV_PTE_X (1 << 3) /* Execute */ #define RISCV_PTE_W (1 << 2) /* Write */ #define RISCV_PTE_R (1 << 1) /* Read */ #define RISCV_PTE_V (1 << 0) /* Valid */ #define RISCV_PTE_RWX (RISCV_PTE_R | RISCV_PTE_W | RISCV_PTE_X) #define RISCV_PTE_PPN0_S 10 #ifdef __riscv _Static_assert(sizeof(pt_entry_t) == sizeof(riscv_pt_entry_t), "pt_entry_t size mismatch"); _Static_assert(PAGE_SHIFT == RISCV_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == RISCV_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == RISCV_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(L3_SHIFT == RISCV_L3_SHIFT, "L3_SHIFT mismatch"); _Static_assert(L3_SIZE == RISCV_L3_SIZE, "L3_SIZE mismatch"); _Static_assert(L3_OFFSET == RISCV_L3_OFFSET, "L3_OFFSET mismatch"); _Static_assert(PTE_PPN0_S == RISCV_PTE_PPN0_S, "PTE_PPN0_S mismatch"); _Static_assert(PTE_SW_MANAGED == RISCV_PTE_SW_MANAGED, "PTE_SW_MANAGED mismatch"); _Static_assert(PTE_SW_WIRED == RISCV_PTE_SW_WIRED, "PTE_SW_WIRED mismatch"); _Static_assert(PTE_D == RISCV_PTE_D, "PTE_D mismatch"); _Static_assert(PTE_A == RISCV_PTE_A, "PTE_A mismatch"); _Static_assert(PTE_G == RISCV_PTE_G, "PTE_G mismatch"); _Static_assert(PTE_U == RISCV_PTE_U, "PTE_U mismatch"); _Static_assert(PTE_X == RISCV_PTE_X, "PTE_X mismatch"); _Static_assert(PTE_W == RISCV_PTE_W, "PTE_W mismatch"); _Static_assert(PTE_R == RISCV_PTE_R, "PTE_R mismatch"); _Static_assert(PTE_V == RISCV_PTE_V, "PTE_V mismatch"); _Static_assert(PTE_RWX == RISCV_PTE_RWX, "PTE_RWX mismatch"); #endif #endif /* !__KVM_RISCV_H__ */ Index: head/lib/libkvm/kvm_sparc64.h =================================================================== --- head/lib/libkvm/kvm_sparc64.h (revision 344854) +++ head/lib/libkvm/kvm_sparc64.h (revision 344855) @@ -1,118 +1,117 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_SPARC64_H__ #define __KVM_SPARC64_H__ #ifdef __sparc64__ #include #include #include #include #include #endif #define SPARC64_PAGE_SHIFT 13 #define SPARC64_PAGE_SIZE (1 << SPARC64_PAGE_SHIFT) #define SPARC64_PAGE_MASK (SPARC64_PAGE_SIZE - 1) #define SPARC64_MIN_DIRECT_ADDRESS (0xfffff80000000000) #define SPARC64_DIRECT_ADDRESS_BITS (43) #define SPARC64_DIRECT_ADDRESS_MASK \ (((uint64_t)1 << SPARC64_DIRECT_ADDRESS_BITS) - 1) #define SPARC64_DIRECT_TO_PHYS(va) ((va) & SPARC64_DIRECT_ADDRESS_MASK) #define SPARC64_TTE_SHIFT (5) #define SPARC64_TD_SIZE_SHIFT (61) #define SPARC64_TD_PA_SHIFT (13) #define SPARC64_TD_SIZE_BITS (2) #define SPARC64_TD_PA_CH_BITS (30) /* US-III{,i,+}, US-IV{,+}, SPARC64 V */ #define SPARC64_TD_PA_BITS SPARC64_TD_PA_CH_BITS #define SPARC64_TD_SIZE_MASK (((uint64_t)1 << SPARC64_TD_SIZE_BITS) - 1) #define SPARC64_TD_PA_MASK (((uint64_t)1 << SPARC64_TD_PA_BITS) - 1) #define SPARC64_TD_V ((uint64_t)1 << 63) #define SPARC64_TV_SIZE_BITS (SPARC64_TD_SIZE_BITS) #define SPARC64_TV_VPN(va, sz) \ ((((va) >> SPARC64_TTE_PAGE_SHIFT(sz)) << SPARC64_TV_SIZE_BITS) | sz) #define SPARC64_TTE_SIZE_SPREAD (3) #define SPARC64_TTE_PAGE_SHIFT(sz) \ (SPARC64_PAGE_SHIFT + ((sz) * SPARC64_TTE_SIZE_SPREAD)) #define SPARC64_TTE_GET_SIZE(tp) \ (((tp)->tte_data >> SPARC64_TD_SIZE_SHIFT) & SPARC64_TD_SIZE_MASK) #define SPARC64_TTE_GET_PA(tp) \ ((tp)->tte_data & (SPARC64_TD_PA_MASK << SPARC64_TD_PA_SHIFT)) struct sparc64_tte { uint64_t tte_vpn; uint64_t tte_data; }; static __inline int sparc64_tte_match(struct sparc64_tte *tp, kvaddr_t va) { return (((tp->tte_data & SPARC64_TD_V) != 0) && (tp->tte_vpn == SPARC64_TV_VPN(va, SPARC64_TTE_GET_SIZE(tp)))); } #ifdef __sparc64__ _Static_assert(PAGE_SHIFT == SPARC64_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == SPARC64_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == SPARC64_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(VM_MIN_DIRECT_ADDRESS == SPARC64_MIN_DIRECT_ADDRESS, "VM_MIN_DIRECT_ADDRESS mismatch"); _Static_assert(TLB_DIRECT_ADDRESS_BITS == SPARC64_DIRECT_ADDRESS_BITS, "TLB_DIRECT_ADDRESS_BITS mismatch"); _Static_assert(TLB_DIRECT_ADDRESS_MASK == SPARC64_DIRECT_ADDRESS_MASK, "TLB_DIRECT_ADDRESS_MASK mismatch"); _Static_assert(TTE_SHIFT == SPARC64_TTE_SHIFT, "TTE_SHIFT mismatch"); _Static_assert(TD_SIZE_SHIFT == SPARC64_TD_SIZE_SHIFT, "TD_SIZE_SHIFT mismatch"); _Static_assert(TD_PA_SHIFT == SPARC64_TD_PA_SHIFT, "TD_PA_SHIFT mismatch"); _Static_assert(TD_SIZE_BITS == SPARC64_TD_SIZE_BITS, "TD_SIZE_BITS mismatch"); _Static_assert(TD_PA_BITS == SPARC64_TD_PA_BITS, "TD_PA_BITS mismatch"); _Static_assert(TD_SIZE_MASK == SPARC64_TD_SIZE_MASK, "TD_SIZE_MASK mismatch"); _Static_assert(TD_PA_MASK == SPARC64_TD_PA_MASK, "TD_PA_MASK mismatch"); _Static_assert(TD_V == SPARC64_TD_V, "TD_V mismatch"); _Static_assert(TV_SIZE_BITS == SPARC64_TV_SIZE_BITS, "TV_SIZE_BITS mismatch"); _Static_assert(TTE_SIZE_SPREAD == SPARC64_TTE_SIZE_SPREAD, "TTE_SIZE_SPREAD mismatch"); #endif #endif /* !__KVM_SPARC64_H__ */ Index: head/lib/libsysdecode/errno.c =================================================================== --- head/lib/libsysdecode/errno.c (revision 344854) +++ head/lib/libsysdecode/errno.c (revision 344855) @@ -1,191 +1,190 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) static #include #endif #include static const int cloudabi_errno_table[] = { [CLOUDABI_E2BIG] = E2BIG, [CLOUDABI_EACCES] = EACCES, [CLOUDABI_EADDRINUSE] = EADDRINUSE, [CLOUDABI_EADDRNOTAVAIL] = EADDRNOTAVAIL, [CLOUDABI_EAFNOSUPPORT] = EAFNOSUPPORT, [CLOUDABI_EAGAIN] = EAGAIN, [CLOUDABI_EALREADY] = EALREADY, [CLOUDABI_EBADF] = EBADF, [CLOUDABI_EBADMSG] = EBADMSG, [CLOUDABI_EBUSY] = EBUSY, [CLOUDABI_ECANCELED] = ECANCELED, [CLOUDABI_ECHILD] = ECHILD, [CLOUDABI_ECONNABORTED] = ECONNABORTED, [CLOUDABI_ECONNREFUSED] = ECONNREFUSED, [CLOUDABI_ECONNRESET] = ECONNRESET, [CLOUDABI_EDEADLK] = EDEADLK, [CLOUDABI_EDESTADDRREQ] = EDESTADDRREQ, [CLOUDABI_EDOM] = EDOM, [CLOUDABI_EDQUOT] = EDQUOT, [CLOUDABI_EEXIST] = EEXIST, [CLOUDABI_EFAULT] = EFAULT, [CLOUDABI_EFBIG] = EFBIG, [CLOUDABI_EHOSTUNREACH] = EHOSTUNREACH, [CLOUDABI_EIDRM] = EIDRM, [CLOUDABI_EILSEQ] = EILSEQ, [CLOUDABI_EINPROGRESS] = EINPROGRESS, [CLOUDABI_EINTR] = EINTR, [CLOUDABI_EINVAL] = EINVAL, [CLOUDABI_EIO] = EIO, [CLOUDABI_EISCONN] = EISCONN, [CLOUDABI_EISDIR] = EISDIR, [CLOUDABI_ELOOP] = ELOOP, [CLOUDABI_EMFILE] = EMFILE, [CLOUDABI_EMLINK] = EMLINK, [CLOUDABI_EMSGSIZE] = EMSGSIZE, [CLOUDABI_EMULTIHOP] = EMULTIHOP, [CLOUDABI_ENAMETOOLONG] = ENAMETOOLONG, [CLOUDABI_ENETDOWN] = ENETDOWN, [CLOUDABI_ENETRESET] = ENETRESET, [CLOUDABI_ENETUNREACH] = ENETUNREACH, [CLOUDABI_ENFILE] = ENFILE, [CLOUDABI_ENOBUFS] = ENOBUFS, [CLOUDABI_ENODEV] = ENODEV, [CLOUDABI_ENOENT] = ENOENT, [CLOUDABI_ENOEXEC] = ENOEXEC, [CLOUDABI_ENOLCK] = ENOLCK, [CLOUDABI_ENOLINK] = ENOLINK, [CLOUDABI_ENOMEM] = ENOMEM, [CLOUDABI_ENOMSG] = ENOMSG, [CLOUDABI_ENOPROTOOPT] = ENOPROTOOPT, [CLOUDABI_ENOSPC] = ENOSPC, [CLOUDABI_ENOSYS] = ENOSYS, [CLOUDABI_ENOTCONN] = ENOTCONN, [CLOUDABI_ENOTDIR] = ENOTDIR, [CLOUDABI_ENOTEMPTY] = ENOTEMPTY, [CLOUDABI_ENOTRECOVERABLE] = ENOTRECOVERABLE, [CLOUDABI_ENOTSOCK] = ENOTSOCK, [CLOUDABI_ENOTSUP] = ENOTSUP, [CLOUDABI_ENOTTY] = ENOTTY, [CLOUDABI_ENXIO] = ENXIO, [CLOUDABI_EOVERFLOW] = EOVERFLOW, [CLOUDABI_EOWNERDEAD] = EOWNERDEAD, [CLOUDABI_EPERM] = EPERM, [CLOUDABI_EPIPE] = EPIPE, [CLOUDABI_EPROTO] = EPROTO, [CLOUDABI_EPROTONOSUPPORT] = EPROTONOSUPPORT, [CLOUDABI_EPROTOTYPE] = EPROTOTYPE, [CLOUDABI_ERANGE] = ERANGE, [CLOUDABI_EROFS] = EROFS, [CLOUDABI_ESPIPE] = ESPIPE, [CLOUDABI_ESRCH] = ESRCH, [CLOUDABI_ESTALE] = ESTALE, [CLOUDABI_ETIMEDOUT] = ETIMEDOUT, [CLOUDABI_ETXTBSY] = ETXTBSY, [CLOUDABI_EXDEV] = EXDEV, [CLOUDABI_ENOTCAPABLE] = ENOTCAPABLE, }; int sysdecode_abi_to_freebsd_errno(enum sysdecode_abi abi, int error) { switch (abi) { case SYSDECODE_ABI_FREEBSD: case SYSDECODE_ABI_FREEBSD32: return (error); #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) case SYSDECODE_ABI_LINUX: case SYSDECODE_ABI_LINUX32: { unsigned int i; /* * This is imprecise since it returns the first * matching errno. */ for (i = 0; i < nitems(linux_errtbl); i++) { if (error == linux_errtbl[i]) return (i); } break; } #endif case SYSDECODE_ABI_CLOUDABI32: case SYSDECODE_ABI_CLOUDABI64: if (error >= 0 && (unsigned int)error < nitems(cloudabi_errno_table)) return (cloudabi_errno_table[error]); break; default: break; } return (INT_MAX); } int sysdecode_freebsd_to_abi_errno(enum sysdecode_abi abi, int error) { switch (abi) { case SYSDECODE_ABI_FREEBSD: case SYSDECODE_ABI_FREEBSD32: return (error); #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) case SYSDECODE_ABI_LINUX: case SYSDECODE_ABI_LINUX32: if (error >= 0 && error <= ELAST) return (linux_errtbl[error]); break; #endif case SYSDECODE_ABI_CLOUDABI32: case SYSDECODE_ABI_CLOUDABI64: { unsigned int i; for (i = 0; i < nitems(cloudabi_errno_table); i++) { if (error == cloudabi_errno_table[i]) return (i); } break; } default: break; } return (INT_MAX); } Index: head/lib/libsysdecode/signal.c =================================================================== --- head/lib/libsysdecode/signal.c (revision 344854) +++ head/lib/libsysdecode/signal.c (revision 344855) @@ -1,143 +1,142 @@ /*- * Copyright (c) 2016 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include static const char *signames[] = { [SIGHUP] = "SIGHUP", [SIGINT] = "SIGINT", [SIGQUIT] = "SIGQUIT", [SIGILL] = "SIGILL", [SIGTRAP] = "SIGTRAP", [SIGABRT] = "SIGABRT", [SIGEMT] = "SIGEMT", [SIGFPE] = "SIGFPE", [SIGKILL] = "SIGKILL", [SIGBUS] = "SIGBUS", [SIGSEGV] = "SIGSEGV", [SIGSYS] = "SIGSYS", [SIGPIPE] = "SIGPIPE", [SIGALRM] = "SIGALRM", [SIGTERM] = "SIGTERM", [SIGURG] = "SIGURG", [SIGSTOP] = "SIGSTOP", [SIGTSTP] = "SIGTSTP", [SIGCONT] = "SIGCONT", [SIGCHLD] = "SIGCHLD", [SIGTTIN] = "SIGTTIN", [SIGTTOU] = "SIGTTOU", [SIGIO] = "SIGIO", [SIGXCPU] = "SIGXCPU", [SIGXFSZ] = "SIGXFSZ", [SIGVTALRM] = "SIGVTALRM", [SIGPROF] = "SIGPROF", [SIGWINCH] = "SIGWINCH", [SIGINFO] = "SIGINFO", [SIGUSR1] = "SIGUSR1", [SIGUSR2] = "SIGUSR2", [SIGTHR] = "SIGTHR", [SIGLIBRT] = "SIGLIBRT", /* XXX: Solaris uses SIGRTMIN, SIGRTMIN+...SIGRTMAX-, SIGRTMAX */ [SIGRTMIN] = "SIGRT0", [SIGRTMIN + 1] = "SIGRT1", [SIGRTMIN + 2] = "SIGRT2", [SIGRTMIN + 3] = "SIGRT3", [SIGRTMIN + 4] = "SIGRT4", [SIGRTMIN + 5] = "SIGRT5", [SIGRTMIN + 6] = "SIGRT6", [SIGRTMIN + 7] = "SIGRT7", [SIGRTMIN + 8] = "SIGRT8", [SIGRTMIN + 9] = "SIGRT9", [SIGRTMIN + 10] = "SIGRT10", [SIGRTMIN + 11] = "SIGRT11", [SIGRTMIN + 12] = "SIGRT12", [SIGRTMIN + 13] = "SIGRT13", [SIGRTMIN + 14] = "SIGRT14", [SIGRTMIN + 15] = "SIGRT15", [SIGRTMIN + 16] = "SIGRT16", [SIGRTMIN + 17] = "SIGRT17", [SIGRTMIN + 18] = "SIGRT18", [SIGRTMIN + 19] = "SIGRT19", [SIGRTMIN + 20] = "SIGRT20", [SIGRTMIN + 21] = "SIGRT21", [SIGRTMIN + 22] = "SIGRT22", [SIGRTMIN + 23] = "SIGRT23", [SIGRTMIN + 24] = "SIGRT24", [SIGRTMIN + 25] = "SIGRT25", [SIGRTMIN + 26] = "SIGRT26", [SIGRTMIN + 27] = "SIGRT27", [SIGRTMIN + 28] = "SIGRT28", [SIGRTMIN + 29] = "SIGRT29", [SIGRTMIN + 30] = "SIGRT30", [SIGRTMIN + 31] = "SIGRT31", [SIGRTMIN + 32] = "SIGRT32", [SIGRTMIN + 33] = "SIGRT33", [SIGRTMIN + 34] = "SIGRT34", [SIGRTMIN + 35] = "SIGRT35", [SIGRTMIN + 36] = "SIGRT36", [SIGRTMIN + 37] = "SIGRT37", [SIGRTMIN + 38] = "SIGRT38", [SIGRTMIN + 39] = "SIGRT39", [SIGRTMIN + 40] = "SIGRT40", [SIGRTMIN + 41] = "SIGRT41", [SIGRTMIN + 42] = "SIGRT42", [SIGRTMIN + 43] = "SIGRT43", [SIGRTMIN + 44] = "SIGRT44", [SIGRTMIN + 45] = "SIGRT45", [SIGRTMIN + 46] = "SIGRT46", [SIGRTMIN + 47] = "SIGRT47", [SIGRTMIN + 48] = "SIGRT48", [SIGRTMIN + 49] = "SIGRT49", [SIGRTMIN + 50] = "SIGRT50", [SIGRTMIN + 51] = "SIGRT51", [SIGRTMIN + 52] = "SIGRT52", [SIGRTMIN + 53] = "SIGRT53", [SIGRTMIN + 54] = "SIGRT54", [SIGRTMIN + 55] = "SIGRT55", [SIGRTMIN + 56] = "SIGRT56", [SIGRTMIN + 57] = "SIGRT57", [SIGRTMIN + 58] = "SIGRT58", [SIGRTMIN + 59] = "SIGRT59", [SIGRTMIN + 60] = "SIGRT60", [SIGRTMIN + 61] = "SIGRT61", }; const char * sysdecode_signal(int sig) { if ((unsigned)sig < nitems(signames)) return (signames[sig]); return (NULL); } Index: head/lib/libsysdecode/syscallnames.c =================================================================== --- head/lib/libsysdecode/syscallnames.c (revision 344854) +++ head/lib/libsysdecode/syscallnames.c (revision 344855) @@ -1,112 +1,111 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Map system call codes to names for the supported ABIs on each * platform. Rather than regnerating system call name tables locally * during the build, use the generated tables in the kernel source * tree. */ #include #include #include #include #include #include static #include #if defined(__amd64__) || defined(__powerpc64__) static #include #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) static #ifdef __aarch64__ #include #elif __amd64__ #include #else #include #endif #endif #ifdef __amd64__ static #include #endif static #include static #include const char * sysdecode_syscallname(enum sysdecode_abi abi, unsigned int code) { switch (abi) { case SYSDECODE_ABI_FREEBSD: if (code < nitems(syscallnames)) return (syscallnames[code]); break; #if defined(__amd64__) || defined(__powerpc64__) case SYSDECODE_ABI_FREEBSD32: if (code < nitems(freebsd32_syscallnames)) return (freebsd32_syscallnames[code]); break; #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) case SYSDECODE_ABI_LINUX: if (code < nitems(linux_syscallnames)) return (linux_syscallnames[code]); break; #endif #ifdef __amd64__ case SYSDECODE_ABI_LINUX32: if (code < nitems(linux32_syscallnames)) return (linux32_syscallnames[code]); break; #endif case SYSDECODE_ABI_CLOUDABI32: if (code < nitems(cloudabi32_syscallnames)) return (cloudabi32_syscallnames[code]); break; case SYSDECODE_ABI_CLOUDABI64: if (code < nitems(cloudabi64_syscallnames)) return (cloudabi64_syscallnames[code]); break; default: break; } return (NULL); } Index: head/lib/libsysdecode/sysdecode.3 =================================================================== --- head/lib/libsysdecode/sysdecode.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode.3 (revision 344855) @@ -1,93 +1,92 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd April 26, 2018 .Dt SYSDECODE 3 .Os .Sh NAME .Nm sysdecode .Nd system argument decoding library .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Sh DESCRIPTION The .Nm library includes several functions that provide descriptive names of values associated with system calls. .Ss Supported ABIs Some functions in this library provide ABI-specific descriptions. The supported ABIs are named by the .Vt enum sysdecode_abi enumeration. .Pp .Bl -tag -width "Li SYSDECODE_ABI_CLOUDABI64" -compact .It Li SYSDECODE_ABI_FREEBSD Native FreeBSD binaries. Supported on all platforms. .It Li SYSDECODE_ABI_FREEBSD32 32-bit FreeBSD binaries. Supported on amd64 and powerpc64. .It Li SYSDECODE_ABI_LINUX Linux binaries of the same platform. Supported on amd64, i386, and arm64. .It Li SYSDECODE_ABI_LINUX32 32-bit Linux binaries. Supported on amd64. .It Li SYSDECODE_ABI_CLOUDABI32 32-bit CloudABI binaries. Supported on all platforms. .It Li SYSDECODE_ABI_CLOUDABI64 64-bit CloudABI binaries. Supported on all platforms. .It Li SYSDECODE_ABI_UNKNOWN A placeholder for use when the ABI is not known. .El .Sh SEE ALSO .Xr sysdecode_abi_to_freebsd_errno 3 , .Xr sysdecode_cap_rights 3 , .Xr sysdecode_cmsg_type 3 , .Xr sysdecode_enum 3 , .Xr sysdecode_fcntl_arg 3 , .Xr sysdecode_ioctlname 3 , .Xr sysdecode_kevent 3 , .Xr sysdecode_mask 3 , .Xr sysdecode_quotactl_cmd 3 , .Xr sysdecode_sctp_sinfo_flags 3 , .Xr sysdecode_sigcode 3 , .Xr sysdecode_socket_protocol 3 , .Xr sysdecode_sockopt_name 3 , .Xr sysdecode_syscallnames 3 , .Xr sysdecode_utrace 3 .Sh HISTORY The .Nm library first appeared in .Fx 11.0 . Index: head/lib/libsysdecode/sysdecode.h =================================================================== --- head/lib/libsysdecode/sysdecode.h (revision 344854) +++ head/lib/libsysdecode/sysdecode.h (revision 344855) @@ -1,133 +1,132 @@ /*- * Copyright (c) 2015 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SYSDECODE_H__ #define __SYSDECODE_H__ enum sysdecode_abi { SYSDECODE_ABI_UNKNOWN = 0, SYSDECODE_ABI_FREEBSD, SYSDECODE_ABI_FREEBSD32, SYSDECODE_ABI_LINUX, SYSDECODE_ABI_LINUX32, SYSDECODE_ABI_CLOUDABI64, SYSDECODE_ABI_CLOUDABI32 }; int sysdecode_abi_to_freebsd_errno(enum sysdecode_abi _abi, int _error); bool sysdecode_access_mode(FILE *_fp, int _mode, int *_rem); const char *sysdecode_acltype(int _type); const char *sysdecode_atfd(int _fd); bool sysdecode_atflags(FILE *_fp, int _flags, int *_rem); bool sysdecode_cap_fcntlrights(FILE *_fp, uint32_t _rights, uint32_t *_rem); void sysdecode_cap_rights(FILE *_fp, cap_rights_t *_rightsp); const char *sysdecode_cmsg_type(int _cmsg_level, int _cmsg_type); const char *sysdecode_extattrnamespace(int _namespace); const char *sysdecode_fadvice(int _advice); void sysdecode_fcntl_arg(FILE *_fp, int _cmd, uintptr_t _arg, int _base); bool sysdecode_fcntl_arg_p(int _cmd); const char *sysdecode_fcntl_cmd(int _cmd); bool sysdecode_fcntl_fileflags(FILE *_fp, int _flags, int *_rem); bool sysdecode_fileflags(FILE *_fp, fflags_t _flags, fflags_t *_rem); bool sysdecode_filemode(FILE *_fp, int _mode, int *_rem); bool sysdecode_flock_operation(FILE *_fp, int _operation, int *_rem); int sysdecode_freebsd_to_abi_errno(enum sysdecode_abi _abi, int _error); const char *sysdecode_getfsstat_mode(int _mode); const char *sysdecode_getrusage_who(int _who); const char *sysdecode_idtype(int _idtype); const char *sysdecode_ioctlname(unsigned long _val); const char *sysdecode_ipproto(int _protocol); void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags, int _base); const char *sysdecode_kevent_filter(int _filter); bool sysdecode_kevent_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_kldsym_cmd(int _cmd); const char *sysdecode_kldunload_flags(int _flags); const char *sysdecode_lio_listio_mode(int _mode); const char *sysdecode_madvice(int _advice); const char *sysdecode_minherit_inherit(int _inherit); const char *sysdecode_msgctl_cmd(int _cmd); bool sysdecode_mlockall_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_mmap_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_mmap_prot(FILE *_fp, int _prot, int *_rem); bool sysdecode_mount_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_msg_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_msync_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_nfssvc_flags(int _flags); bool sysdecode_open_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_pathconf_name(int _name); bool sysdecode_pipe2_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_prio_which(int _which); const char *sysdecode_procctl_cmd(int _cmd); const char *sysdecode_ptrace_request(int _request); bool sysdecode_quotactl_cmd(FILE *_fp, int _cmd); bool sysdecode_reboot_howto(FILE *_fp, int _howto, int *_rem); bool sysdecode_rfork_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_rlimit(int _resource); const char *sysdecode_rtprio_function(int _function); const char *sysdecode_scheduler_policy(int _policy); bool sysdecode_sctp_nxt_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_sctp_pr_policy(int _policy); bool sysdecode_sctp_rcv_flags(FILE *_fp, int _flags, int *_rem); void sysdecode_sctp_sinfo_flags(FILE *_fp, int _sinfo_flags); bool sysdecode_sctp_snd_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_semctl_cmd(int _cmd); bool sysdecode_semget_flags(FILE *_fp, int _flag, int *_rem); bool sysdecode_sendfile_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_shmat_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_shmctl_cmd(int _cmd); const char *sysdecode_shutdown_how(int _how); const char *sysdecode_sigbus_code(int _si_code); const char *sysdecode_sigchld_code(int _si_code); const char *sysdecode_sigcode(int _sig, int _si_code); const char *sysdecode_sigfpe_code(int _si_code); const char *sysdecode_sigill_code(int _si_code); const char *sysdecode_signal(int _sig); const char *sysdecode_sigprocmask_how(int _how); const char *sysdecode_sigsegv_code(int _si_code); const char *sysdecode_sigtrap_code(int _si_code); const char *sysdecode_sockaddr_family(int _sa_family); const char *sysdecode_socketdomain(int _domain); const char *sysdecode_socket_protocol(int _domain, int _protocol); bool sysdecode_socket_type(FILE *_fp, int _type, int *_rem); const char *sysdecode_sockopt_level(int _level); const char *sysdecode_sockopt_name(int _level, int _optname); const char *sysdecode_syscallname(enum sysdecode_abi _abi, unsigned int _code); const char *sysdecode_sysarch_number(int _number); bool sysdecode_thr_create_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_umtx_cvwait_flags(FILE *_fp, u_long _flags, u_long *_rem); const char *sysdecode_umtx_op(int _op); bool sysdecode_umtx_rwlock_flags(FILE *_fp, u_long _flags, u_long *_rem); int sysdecode_utrace(FILE *_fp, void *_buf, size_t _len); bool sysdecode_vmprot(FILE *_fp, int _type, int *_rem); const char *sysdecode_vmresult(int _result); bool sysdecode_wait4_options(FILE *_fp, int _options, int *_rem); bool sysdecode_wait6_options(FILE *_fp, int _options, int *_rem); const char *sysdecode_whence(int _whence); #endif /* !__SYSDECODE_H__ */ Index: head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3 =================================================================== --- head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3 (revision 344855) @@ -1,97 +1,96 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 17, 2016 .Dt sysdecode_abi_to_freebsd_errno 3 .Os .Sh NAME .Nm sysdecode_abi_to_freebsd_errno , .Nm sysdecode_freebsd_to_abi_errno .Nd translate error numbers between process ABIs .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft int .Fn sysdecode_abi_to_freebsd_errno "enum sysdecode_abi abi" "int error" .Ft int .Fn sysdecode_freebsd_to_abi_errno "enum sysdecode_abi abi" "int error" .Sh DESCRIPTION The .Fn sysdecode_abi_to_freebsd_errno function returns the native .Xr errno 2 value that corresponds to the error indicated by .Fa error for the process ABI .Fa abi . If .Fa error does not identify a valid error for .Fa abi , .Dv INT_MAX is returned. .Pp The .Fn sysdecode_freebsd_to_abi_errno function the error value for the process ABI .Fa abi that corresponds to the native .Xr errno 2 value .Fa error . If .Fa error does not identify a valid .Xr errno 2 error, .Dv INT_MAX is returned. .Pp Note that the mappings between native .Xr errno 2 values and errors for other ABIs are not exhaustive. If a mapping does not exist, these functions return .Dv INT_MAX . In addition, multiple error values in one ABI may map to a single error in another ABI. .Sh RETURN VALUES These functions return an error value on success or .Dv INT_MAX if .Fa error is not valid. .Pp For the list of supported ABIs, see .Xr sysdecode 3 . .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_syscallnames 3 Index: head/lib/libsysdecode/sysdecode_cap_rights.3 =================================================================== --- head/lib/libsysdecode/sysdecode_cap_rights.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_cap_rights.3 (revision 344855) @@ -1,51 +1,50 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 24, 2017 .Dt sysdecode_cap_rights 3 .Os .Sh NAME .Nm sysdecode_cap_rights .Nd output list of capability rights .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft void .Fn sysdecode_cap_rights "FILE *fp" "cap_rights_t *rightsp" .Sh DESCRIPTION The .Fn sysdecode_cap_rights function outputs a comma-separated list of capability rights at .Fa rightsp to the stream .Fa fp . .Sh SEE ALSO .Xr sysdecode 3 Index: head/lib/libsysdecode/sysdecode_enum.3 =================================================================== --- head/lib/libsysdecode/sysdecode_enum.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_enum.3 (revision 344855) @@ -1,257 +1,256 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd January 14, 2018 .Dt sysdecode_enum 3 .Os .Sh NAME .Nm sysdecode_enum , .Nm sysdecode_acltype , .Nm sysdecode_atfd , .Nm sysdecode_extattrnamespace , .Nm sysdecode_fadvice , .Nm sysdecode_fcntl_cmd , .Nm sysdecode_getfsstat_mode , .Nm sysdecode_getrusage_who , .Nm sysdecode_idtype , .Nm sysdecode_ipproto , .Nm sysdecode_kldsym_cmd , .Nm sysdecode_kldunload_flags , .Nm sysdecode_lio_listio_mode , .Nm sysdecode_madvice , .Nm sysdecode_minherit_flags , .Nm sysdecode_msgctl_cmd , .Nm sysdecode_nfssvc_flags , .Nm sysdecode_pathconf_name , .Nm sysdecode_prio_which , .Nm sysdecode_procctl_cmd , .Nm sysdecode_ptrace_request , .Nm sysdecode_rlimit , .Nm sysdecode_rtprio_function , .Nm sysdecode_scheduler_policy , .Nm sysdecode_sctp_pr_policy , .Nm sysdecode_sctp_sinfo_flags , .Nm sysdecode_semctl_cmd , .Nm sysdecode_shmctl_cmd , .Nm sysdecode_shutdown_how , .Nm sysdecode_sigbus_code , .Nm sysdecode_sigchld_code , .Nm sysdecode_sigfpe_code , .Nm sysdecode_sigill_code , .Nm sysdecode_signal , .Nm sysdecode_sigprocmask_how , .Nm sysdecode_sigsegv_code , .Nm sysdecode_sigtrap_code , .Nm sysdecode_sockaddr_family , .Nm sysdecode_socketdomain , .Nm sysdecode_sockettype , .Nm sysdecode_sockopt_level , .Nm sysdecode_sysarch_number , .Nm sysdecode_umtx_op , .Nm sysdecode_vmresult , .Nm sysdecode_whence .Nd lookup name of various enumerated values .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_acltype "int type" .Ft const char * .Fn sysdecode_atfd "int fd" .Ft const char * .Fn sysdecode_extattrnamespace "int namespace" .Ft const char * .Fn sysdecode_fadvice "int advice" .Ft const char * .Fn sysdecode_fcntl_cmd "int cmd" .Ft const char * .Fn sysdecode_getfsstat_mode "int mode" .Ft const char * .Fn sysdecode_getrusage_who "int who" .Ft const char * .Fn sysdecode_idtype "int idtype" .Ft const char * .Fn sysdecode_ipproto "int protocol" .Ft const char * .Fn sysdecode_kldsym_cmd "int cmd" .Ft const char * .Fn sysdecode_kldunload_flags "int flags" .Ft const char * .Fn sysdecode_lio_listio_mode "int mode" .Ft const char * .Fn sysdecode_madvice "int advice" .Ft const char * .Fn sysdecode_minherit_flags "int inherit" .Ft const char * .Fn sysdecode_msgctl_cmd "int cmd" .Ft const char * .Fn sysdecode_nfssvc_flags "int flags" .Ft const char * .Fn sysdecode_pathconf_name "int name" .Ft const char * .Fn sysdecode_prio_which "int which" .Ft const char * .Fn sysdecode_procctl_cmd "int cmd" .Ft const char * .Fn sysdecode_ptrace_request "int request" .Ft const char * .Fn sysdecode_rlimit "int resource" .Ft const char * .Fn sysdecode_rtprio_function "int function" .Ft const char * .Fn sysdecode_scheduler_policy "int policy" .Ft const char * .Fn sysdecode_sctp_pr_policy "int policy" .Ft const char * .Fn sysdecode_semctl_cmd "int cmd" .Ft const char * .Fn sysdecode_shmctl_cmd "int cmd" .Ft const char * .Fn sysdecode_shutdown_how "int how" .Ft const char * .Fn sysdecode_sigbus_code "int si_code" .Ft const char * .Fn sysdecode_sigchld_code "int si_code" .Ft const char * .Fn sysdecode_sigfpe_code "int si_code" .Ft const char * .Fn sysdecode_sigill_code "int si_code" .Ft const char * .Fn sysdecode_signal "int sig" .Ft const char * .Fn sysdecode_sigprocmask_how "int how" .Ft const char * .Fn sysdecode_sigsegv_code "int si_code" .Ft const char * .Fn sysdecode_sigtrap_code "int si_code" .Ft const char * .Fn sysdecode_sockaddr_family "int sa_family" .Ft const char * .Fn sysdecode_socketdomain "int domain" .Ft const char * .Fn sysdecode_sockettype "int type" .Ft const char * .Fn sysdecode_sockopt_level "int level" .Ft const char * .Fn sysdecode_sysarch_number "int number" .Ft const char * .Fn sysdecode_umtx_op "int op" .Ft const char * .Fn sysdecode_vmresult "int result" .Ft const char * .Fn sysdecode_whence "int whence" .Sh DESCRIPTION The .Nm functions return a text description of an integer value. The text description matches the name of a C macro with the same value as the sole function argument. .Dv NULL is returned if there is no matching C macro name. .Pp Most of these functions decode an argument passed to a system call: .Bl -column "Fn sysdecode_extattrnamespace" "Xr sched_setscheduler 2" .It Sy Function Ta Sy System Call Ta Sy Argument .It Fn sysdecode_acltype Ta Xr acl_get_file 3 Ta Fa type .It Fn sysdecode_atfd Ta Xr openat 2 Ta Fa fd .It Fn sysdecode_extattrnamespace Ta Xr extattr_get_fd 2 Ta Fa attrnamespace .It Fn sysdecode_fadvice Ta Xr posix_fadvise 2 Ta Fa advice .It Fn sysdecode_fcntl_cmd Ta Xr fcntl 2 Ta Fa cmd .It Fn sysdecode_getfsstat_mode Ta Xr getfsstat 2 Ta Fa mode .It Fn sysdecode_idtype Ta .Xr procctl 2 , .Xr waitid 2 .Ta Fa idtype .It Fn sysdecode_kldsym_cmd Ta Xr kldsym 2 Ta Fa cmd .It Fn sysdecode_kldunload_flags Ta Xr kldunloadf 2 Ta Fa flags .It Fn sysdecode_lio_listio_mode Ta Xr lio_listio 2 Ta Fa mode .It Fn sysdecode_madvice Ta Xr madvise 2 Ta Fa advice .It Fn sysdecode_minherit_inherit Ta Xr minherit 2 Ta Fa inherit .It Fn sysdecode_msgctl_cmd Ta Xr msgctl 2 Ta Fa cmd .It Fn sysdecode_nfssvc_flags Ta Xr nfssvc 2 Ta Fa flags .It Fn sysdecode_pathconf_name Ta Xr pathconf 2 Ta Fa name .It Fn sysdecode_prio_which Ta Xr getpriority 2 Ta Fa which .It Fn sysdecode_procctl_cmd Ta Xr procctl 2 Ta Fa cmd .It Fn sysdecode_ptrace_request Ta Xr ptrace 2 Ta Fa request .It Fn sysdecode_rlimit Ta Xr getrlimit 2 Ta Fa resource .It Fn sysdecode_rtprio_function Ta Xr rtprio 2 Ta Fa function .It Fn sysdecode_getrusage_who Ta Xr getrusage 2 Ta Fa who .It Fn sysdecode_scheduler_policy Ta Xr sched_setscheduler 2 Ta Fa policy .It Fn sysdecode_semctl_cmd Ta Xr semctl 2 Ta Fa cmd .It Fn sysdecode_shmctl_cmd Ta Xr shmctl 2 Ta Fa cmd .It Fn sysdecode_shutdown_how Ta Xr shutdown 2 Ta Fa how .It Fn sysdecode_sigprocmask_how Ta Xr sigprocmask 2 Ta Fa how .It Fn sysdecode_sockopt_level Ta Xr getsockopt 2 Ta Fa level .It Fn sysdecode_sysarch_number Ta Xr sysarch 2 Ta Fa number .It Fn sysdecode_umtx_op Ta Xr _umtx_op 2 Ta Fa op .It Fn sysdecode_whence Ta Xr lseek 2 Ta Fa whence .El .Pp These functions decode signal-specific signal codes stored in the .Fa si_code field of the .Vt siginfo_t object associated with an instance of signal: .Bl -column "Fn sysdecode_sigchld_code" .It Sy Function Ta Sy Signal .It Fn sysdecode_sigbus_code Ta Dv SIGBUS .It Fn sysdecode_sigchld_code Ta Dv SIGCHLD .It Fn sysdecode_sigfpe_code Ta Dv SIGFPE .It Fn sysdecode_sigill_code Ta Dv SIGILL .It Fn sysdecode_sigsegv_code Ta Dv SIGSEGV .It Fn sysdecode_sigtrap_code Ta Dv SIGBTRAP .El .Pp Other functions decode the values described below: .Bl -tag -width "Fn sysdecode_sockaddr_family" .It Fn sysdecode_ipproto An IP protocol. .It Fn sysdecode_sctp_pr_policy A PR-SCTP policy. .It Fn sysdecode_signal A process signal. .It Fn sysdecode_sockaddr_family A socket address family. .It Fn sysdecode_socketdomain A socket domain. .It Fn sysdecode_vmresult The return value of a function in the virtual memory subsystem of the kernel indicating the status of the associated request. .El .Sh RETURN VALUES The .Nm functions return the name of a matching C macro or .Dv NULL if no matching C macro was found. .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_mask 3 , .Xr sysdecode_sigcode 3 Index: head/lib/libsysdecode/sysdecode_fcntl_arg.3 =================================================================== --- head/lib/libsysdecode/sysdecode_fcntl_arg.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_fcntl_arg.3 (revision 344855) @@ -1,122 +1,121 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 24, 2017 .Dt sysdecode_fcntl_arg 3 .Os .Sh NAME .Nm sysdecode_fcntl_arg , .Nm sysdecode_fcntl_arg_p .Nd output description of fcntl argument .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft void .Fn sysdecode_fcntl_arg "FILE *fp" "int cmd" "uintptr_t arg" "int base" .Ft bool .Fn sysdecode_fcntl_arg_p "int cmd" .Sh DESCRIPTION The .Fn sysdecode_fcntl_arg function outputs a text description of the optional .Fa arg argument to .Xr fcntl 2 to the stream .Fa fp . The type and format of .Fa arg are determined by .Fa cmd : .Bl -column ".Dv F_SETLKW" "Vt struct flock *" .It Sy Command Ta Fa arg Sy Type Ta Sy Output Format .It .It Dv F_SETFD Ta Vt int Ta .Dq FD_CLOEXEC or the value of .Fa arg in the indicated .Fa base .Pq one of 8, 10, or 16 . .It .It Dv F_SETFL Ta Vt int Ta File flags as output by .Xr sysdecode_fcntl_fileflags 3 with any unknown or remaining bits output in hexadecimal. .It .It Dv F_GETLK Ta Vt struct flock * Ta .It Dv F_SETLK Ta Vt struct flock * Ta .It Dv F_SETLKW Ta Vt struct flock * Ta The value of .Fa arg using the .Dq %p conversion specification. .It .It Others Ta Vt int Ta The value of .Fa arg in the indicated .Fa base .Pq one of 8, 10, or 16 . .El .Pp The .Fn sysdecode_fcntl_arg_p function can be used to determine if a .Xr fcntl 2 command uses the optional third argument to .Xr fcntl 2 . The function returns .Dv true if .Fa cmd accepts a third argument to .Xr fcntl 2 and .Dv false if it does not. .Sh RETURN VALUES The .Nm sysdecode_fcntl_arg_p function returns .Dv true if .Fa cmd accepts a third argument to .Xr fcntl 2 and .Dv false if it does not. .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_fcntl_cmd 3 , .Xr sysdecode_fcntl_fileflags 3 Index: head/lib/libsysdecode/sysdecode_ioctlname.3 =================================================================== --- head/lib/libsysdecode/sysdecode_ioctlname.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_ioctlname.3 (revision 344855) @@ -1,60 +1,59 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 17, 2016 .Dt sysdecode_ioctlname 3 .Os .Sh NAME .Nm sysdecode_ioctlname .Nd lookup name of device control command .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_ioctlname "unsigned long request" .Sh DESCRIPTION The .Fn sysdecode_ioctlname function returns the name of a device control request identified by .Fa request . A table of names is generated during the build of the .Nm sysdecode library from system headers that maps device control request values to the name of the corresponding C macro. .Sh RETURN VALUES The .Fn sysdecode_ioctlname function returns the name of a device control request if .Fa request is a known value; otherwise .Dv NULL . .Sh SEE ALSO .Xr sysdecode 3 Index: head/lib/libsysdecode/sysdecode_kevent.3 =================================================================== --- head/lib/libsysdecode/sysdecode_kevent.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_kevent.3 (revision 344855) @@ -1,126 +1,125 @@ .\" .\" Copyright (c) 2017 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 24, 2017 .Dt sysdecode_kevent 3 .Os .Sh NAME .Nm sysdecode_kevent , .Nm sysdecode_kevent_fflags , .Nm sysdecode_kevent_filter , .Nm sysdecode_kevent_flags .Nd output description of kevent structure fields .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft void .Fn sysdecode_kevent_fflags "FILE *fp" "short filter" "int fflags" "int base" .Ft bool .Fn sysdecode_kevent_flags "FILE *fp" "int flags" "int *rem" .Ft const char * .Fn sysdecode_kevent_filter "int filter" .Sh DESCRIPTION These functions provide text descriptions of .Vt struct kevent fields. .Pp The .Fn sysdecode_kevent_fflags function outputs a text description of the .Fa fflags member of a .Vt struct kevent to the stream .Fa fp . For the .Dv EVFILT_READ , .Dv EVFILT_WRITE , .Dv EVFILT_VNODE , .Dv EVFILT_PROC , .Dv EVFILT_PROCDESC , .Dv EVFILT_TIMER , and .Dv EVFILT_USER filters, .Fn sysdecode_kevent_fflags outputs a bitmask of filter-specific .Dv NOTE_* flags as documented in .Xr kevent 2 . For other values of .Fa filter , the value of .Fa fflags is output in the indicated .Fa base .Pq one of 8, 10, or 16 . .Pp The .Fn sysdecode_kevent_filter function returns a text description of the .Fa filter member of a .Vt struct kevent . .Dv NULL is returned if the .Fa filter value is unknown. .Pp The .Fn sysdecode_kevent_flags function outputs a text description of the .Fa flags member of a .Vt struct kevent to the stream .Fa fp . This function uses the same calling convention and formatting as the other functions described in .Xr sysdecode_mask 3 . .Sh RETURN VALUES The .Nm sysdecode_kevent_filter function returns the name of a filter or .Dv NULL if the filter value is unknown. .Pp The .Nm sysdecode_kevent_flags function returns .Dv true if any flags in the .Fa flags field were decoded and .Dv false if no flags were decoded. .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_enum 3 , .Xr sysdecode_mask 3 Index: head/lib/libsysdecode/sysdecode_mask.3 =================================================================== --- head/lib/libsysdecode/sysdecode_mask.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_mask.3 (revision 344855) @@ -1,243 +1,242 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd January 16, 2018 .Dt sysdecode_mask 3 .Os .Sh NAME .Nm sysdecode_mask , .Nm sysdecode_accessmode , .Nm sysdecode_atflags , .Nm sysdecode_capfcntlrights , .Nm sysdecode_fcntl_fileflags , .Nm sysdecode_fileflags , .Nm sysdecode_filemode , .Nm sysdecode_flock_operation , .Nm sysdecode_mlockall_flags , .Nm sysdecode_mmap_flags , .Nm sysdecode_mmap_prot , .Nm sysdecode_mount_flags , .Nm sysdecode_msg_flags , .Nm sysdecode_msync_flags , .Nm sysdecode_open_flags , .Nm sysdecode_pipe2_flags , .Nm sysdecode_reboot_howto , .Nm sysdecode_rfork_flags , .Nm sysdecode_semget_flags , .Nm sysdecode_sendfile_flags , .Nm sysdecode_shmat_flags , .Nm sysdecode_sctp_nxt_flags , .Nm sysdecode_sctp_rcv_flags , .Nm sysdecode_sctp_snd_flags , .Nm sysdecode_socket_type , .Nm sysdecode_thr_create_flags , .Nm sysdecode_umtx_cvwait_flags , .Nm sysdecode_umtx_rwlock_flags , .Nm sysdecode_vmprot , .Nm sysdecode_wait4_options , .Nm sysdecode_wait6_options .Nd print name of various bitmask values .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft bool .Fn sysdecode_access_mode "FILE *fp" "int mode" "int *rem" .Ft bool .Fn sysdecode_atflags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_cap_fcntlrights "FILE *fp" "uint32_t rights" "uint32_t *rem" .Ft bool .Fn sysdecode_fcntl_fileflags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_fileflags "FILE *fp" "fflags_t flags" "fflags_t *rem" .Ft bool .Fn sysdecode_filemode "FILE *fp" "int mode" "int *rem" .Ft bool .Fn sysdecode_flock_operation "FILE *fp" "int operation" "int *rem" .Ft bool .Fn sysdecode_mlockall_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_mmap_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_mmap_prot "FILE *fp" "int prot" "int *rem" .Ft bool .Fn sysdecode_mount_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_msg_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_msync_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_open_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_pipe2_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_reboot_howto "FILE *fp" "int howto" "int *rem" .Ft bool .Fn sysdecode_rfork_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_nxt_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_rcv_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_snd_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_semget_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sendfile_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_shmat_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_socket_type "FILE *fp" "int type" "int *rem" .Ft bool .Fn sysdecode_thr_create_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_umtx_cvwait_flags "FILE *fp" "u_long flags" "u_long *rem" .Ft bool .Fn sysdecode_umtx_rwlock_flags "FILE *fp" "u_long flags" "u_long *rem" .Ft bool .Fn sysdecode_vmprot "FILE *fp" "int type" "int *rem" .Ft bool .Fn sysdecode_wait4_options "FILE *fp" "int options" "int *rem" .Ft bool .Fn sysdecode_wait6_options "FILE *fp" "int options" "int *rem" .Sh DESCRIPTION The .Nm functions are used to generate a text description of an integer value built from a mask of bitfields. The text description lists the C macros for field values joined by pipe .Sq | characters matching the format used in C source code. Most of the values decoded by these functions are passed as arguments to system calls, though some of these values are used internally in the kernel. .Pp Each function writes the text description to .Fa fp . The second argument should contain the integer value to be decoded. The .Fa rem argument is set to the value of any bits that were not decoded .Pq bit fields that do not have a corresponding C macro . .Fa rem may be set to .Dv NULL if the caller does not need this value. Each function returns .Dv true if any bit fields in the value were decoded and .Dv false if no bit fields were decoded. .Pp Most of these functions decode an argument passed to a system call: .Bl -column "Fn sysdecode_flock_operation" "Xr cap_fcntls_limit 2" .It Sy Function Ta Sy System Call Ta Sy Argument .It Fn sysdecode_access_mode Ta Xr access 2 Ta Fa mode .It Fn sysdecode_atflags Ta Xr chflagsat 2 , Xr fstatat 2 Ta Fa atflag , Fa flag .It Fn sysdecode_cap_fcntlrights Ta Xr cap_fcntls_limit 2 Ta Fa fcntlrights .It Fn sysdecode_fileflags Ta Xr chflags 2 Ta Fa flags .It Fn sysdecode_filemode Ta Xr chmod 2 , Xr open 2 Ta mode .It Fn sysdecode_flock_operation Ta Xr flock 2 Ta Fa operation .It Fn sysdecode_mlockall_flags Ta Xr mlockall 2 Ta Fa flags .It Fn sysdecode_mmap_flags Ta Xr mmap 2 Ta Fa flags .It Fn sysdecode_mmap_prot Ta Xr mmap 2 Ta Fa prot .It Fn sysdecode_mount_flags Ta Xr mount 2 Ta Fa flags .It Fn sysdecode_msg_flags Ta Xr recv 2 , Xr send 2 Ta Fa flags .It Fn sysdecode_msync_flags Ta Xr msync 2 Ta Fa flags .It Fn sysdecode_open_flags Ta Xr open 2 Ta Fa flags .It Fn sysdecode_pipe2_flags Ta Xr pipe2 Ta Fa flags .It Fn sysdecode_reboot_howto Ta Xr reboot 2 Ta Fa howto .It Fn sysdecode_rfork_flags Ta Xr rfork 2 Ta Fa flags .It Fn sysdecode_semget_flags Ta Xr semget 2 Ta Fa flags .It Fn sysdecode_sendfile_flags Ta Xr sendfile 2 Ta Fa flags .It Fn sysdecode_shmat_flags Ta Xr shmat 2 Ta Fa flags .It Fn sysdecode_socket_type Ta Xr socket 2 Ta Fa type .It Fn sysdecode_thr_create_flags Ta Xr thr_create 2 Ta Fa flags .It Fn sysdecode_wait4_options Ta Xr wait4 2 Ta Fa options .It Fn sysdecode_wait6_options Ta Xr wait6 2 Ta Fa options .El .Pp Other functions decode the values described below: .Bl -tag -width ".Fn sysdecode_umtx_cvwait_flags" .It Fn sysdecode_fcntl_fileflags The file flags used with the .Dv F_GETFL and .Dv F_SETFL .Xr fcntl 2 commands. .It Fn sysdecode_sctp_nxt_flags The .Fa nxt_flags member of a .Vt struct sctp_nxtinfo . .It Fn sysdecode_sctp_rcv_flags The .Fa rcv_flags member of a .Vt struct sctp_rcvinfo . .It Fn sysdecode_sctp_snd_flags The .Fa snd_flags member of a .Vt struct sctp_sndinfo . .It Fn sysdecode_umtx_cvwait_flags The .Fa val argument to .Xr _umtx_op 2 for .Dv UMTX_OP_CV_WAIT operations. .It Fn sysdecode_umtx_rwlock_flags The .Fa val argument to .Xr _umtx_op 2 for .Dv UMTX_OP_RW_RDLOCK operations. .It Fn sysdecode_vmprot The memory protection flags stored in .Vt vm_prot_t variables. .El .Sh RETURN VALUES The .Nm functions return .Dv true if any bit fields in the value were decoded and .Dv false if no bit fields were decoded. .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_enum 3 Index: head/lib/libsysdecode/sysdecode_quotactl_cmd.3 =================================================================== --- head/lib/libsysdecode/sysdecode_quotactl_cmd.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_quotactl_cmd.3 (revision 344855) @@ -1,94 +1,93 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 24, 2017 .Dt sysdecode_quotactl_cmd 3 .Os .Sh NAME .Nm sysdecode_quotactl_cmd .Nd output name of quotactl command .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft bool .Fn sysdecode_quotactl_cmd "FILE *fp" "int cmd" .Sh DESCRIPTION The .Fn sysdecode_quotactl_cmd function outputs a text description of the .Fa cmd argument to .Xr quotactl 2 to the stream .Fa fp . The description is formatted as an invocation of the .Dv QCMD macro defined in the .In ufs/ufs/quota.h header. .Pp The function first computes the primary and secondary values used by .Dv QCMD to construct .Fa cmd . If the primary command value does not represent a known constant, .Fn sysdecode_quotactl_cmd does not generate any output and returns .Dv false . Otherwise, .Fn sysdecode_quotactl_cmd outputs text depicting an invocation of .Dv QCMD with the associated constants for the primary and secondary command values and returns .Dv true . If the secondary command values does not represent a known constant, its value is output as a hexadecimal integer. .Sh RETURN VALUES The .Nm sysdecode_quotactl_cmd function returns .Dv true if it outputs a description of .Fa cmd and .Dv false if it does not. .Sh EXAMPLES The statement .Pp .Dl sysdecode_quotatcl_cmd(stdout, QCMD(Q_GETQUOTA, USRQUOTA); .Pp outputs the text .Dq QCMD(Q_GETQUOTA, USRQUOTA) to standard output. .Sh SEE ALSO .Xr sysdecode 3 Index: head/lib/libsysdecode/sysdecode_sigcode.3 =================================================================== --- head/lib/libsysdecode/sysdecode_sigcode.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_sigcode.3 (revision 344855) @@ -1,83 +1,82 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 17, 2016 .Dt sysdecode_sigcode 3 .Os .Sh NAME .Nm sysdecode_sigcode .Nd lookup name of signal code .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_sigcode "int signal" "int si_code" .Sh DESCRIPTION The .Fn sysdecode_sigcode function returns a text description of the .Fa si_code field of the .Vt siginfo_t object associated with an instance of signal .Fa sig . The text description contains the name of the C macro whose value matches .Fa si_code . General purpose signal codes such as .Dv SI_USER are handled as well as signal-specific codes for .Dv SIGBUS , .Dv SIGCHLD , .Dv SIGFPE , .Dv SIGILL , .Dv SIGSEGV and .Dv SIGTRAP . If .Fa si_code does not represent a known signal code, .Fn sysdecode_sigcode returns .Dv NULL . .Sh RETURN VALUES The .Fn sysdecode_sigcode function returns a pointer to a signal code description or .Dv NULL if .Fa si_code is not a known signal code. .Sh SEE ALSO .Xr sysdecode_sigbus_code 3 , .Xr sysdecode_sigchld_code 3 , .Xr sysdecode_sigfpe_code 3 , .Xr sysdecode_sigill_code 3 , .Xr sysdecode_sigsegv_code 3 , .Xr sysdecode_sigtrap_code 3 Index: head/lib/libsysdecode/sysdecode_socket_protocol.3 =================================================================== --- head/lib/libsysdecode/sysdecode_socket_protocol.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_socket_protocol.3 (revision 344855) @@ -1,54 +1,53 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd May 25, 2017 .Dt sysdecode_socket_protocol 3 .Os .Sh NAME .Nm sysdecode_socket_protocol .Nd lookup name of socket protocol .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_socket_protocol "int domain" "int protocol" .Sh DESCRIPTION The .Fn sysdecode_socket_protocol function returns a text description of the protocol passed in the .Fa protocol argument to .Xr socket 2 . .Fn sysdecode_socket_protocol takes the .Fa protocol as well as the .Fa domain to uniquely identify the protocol. Index: head/lib/libsysdecode/sysdecode_sockopt_name.3 =================================================================== --- head/lib/libsysdecode/sysdecode_sockopt_name.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_sockopt_name.3 (revision 344855) @@ -1,61 +1,60 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 17, 2016 .Dt sysdecode_sockopt_name 3 .Os .Sh NAME .Nm sysdecode_sockopt_name .Nd lookup name of socket option .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_sockopt_name "int level" "int optname" .Sh DESCRIPTION The .Fn sysdecode_sockopt_name function returns a text description of the socket option name passed in the .Fa optname argument to .Xr getsockopt 2 . .Fn sysdecode_sockopt_name takes the socket option .Fa level as well as the option name to uniquely identify the option. .Sh SEE ALSO .Xr sysdecode_sockopt_level 3 .Sh BUGS Socket option levels and names are protocol-specific. Both .Fn sysdecode_sockopt_level and .Fn sysdecode_sockopt_name should possibly accept the protocol family as an additional argument. Index: head/lib/libsysdecode/sysdecode_syscallnames.3 =================================================================== --- head/lib/libsysdecode/sysdecode_syscallnames.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_syscallnames.3 (revision 344855) @@ -1,71 +1,70 @@ .\" .\" Copyright (c) 2016 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 17, 2016 .Dt sysdecode_syscallnames 3 .Os .Sh NAME .Nm sysdecode_syscallnames .Nd lookup name of system calls .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In sysdecode.h .Ft const char * .Fn sysdecode_syscallnames "enum sysdecode_abi abi" "unsigned int code" .Sh DESCRIPTION This function returns a pointer to the name of a system call identified by .Fa code for the process ABI .Fa abi . If .Fa code specifies an unknown system call or .Fa abi is an unsupported ABI, .Nm returns .Dv NULL . .Pp For the list of supported ABIs, see .Xr sysdecode 3 . .Sh RETURN VALUES The .Nm function returns a pointer to a string on success or .Dv NULL if either .Fa code or .Fa ABI is invalid . .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_abi_to_freebsd_errno 3 Index: head/lib/libsysdecode/sysdecode_utrace.3 =================================================================== --- head/lib/libsysdecode/sysdecode_utrace.3 (revision 344854) +++ head/lib/libsysdecode/sysdecode_utrace.3 (revision 344855) @@ -1,77 +1,76 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 24, 2017 .Dt sysdecode_utrace 3 .Os .Sh NAME .Nm sysdecode_utrace .Nd produce text description of a utrace record .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft int .Fn sysdecode_utrace "FILE *fp" "void *buf" "size_t len" "int decimal" .Sh DESCRIPTION The .Fn sysdecode_utrace function outputs a textual representation of a .Xr utrace 2 record identified by .Fa buf and .Fa len to the output stream .Fa fp . .Pp The function only outputs a representation for certain types of records. If a record is recognized, the function outputs the description and returns a non-zero value. If the record is not recognized, the function does not output anything and returns zero. The .Fn sysdecode_utrace function currently recognizes .Xr utrace 2 records generated by .Xr malloc 3 and .Xr rtld 1 . .Sh RETURN VALUES The .Fn sysdecode_utrace function returns a non-zero value if it recognizes a .Xr utrace 2 record; otherwise it returns zero. .Sh SEE ALSO .Xr utrace 2 , .Xr sysdecode 3 Index: head/lib/libutil/kinfo_getvmobject.3 =================================================================== --- head/lib/libutil/kinfo_getvmobject.3 (revision 344854) +++ head/lib/libutil/kinfo_getvmobject.3 (revision 344855) @@ -1,74 +1,73 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd May 27, 2015 .Dt KINFO_GETVMOBJECT 3 .Os .Sh NAME .Nm kinfo_getvmobject .Nd function for getting system-wide memory information .Sh LIBRARY .Lb libutil .Sh SYNOPSIS .In sys/types.h .In sys/user.h .In libutil.h .Ft struct kinfo_vmobject * .Fn kinfo_getvmobject "int *cntp" .Sh DESCRIPTION This function is used to obtain information about the objects using memory in the system. .Pp The .Ar cntp argument allows the caller to know how many records are returned. .Pp This function is a wrapper around the .Dq vm.objects .Xr sysctl 3 MIB. While the kernel returns a packed structure, this function expands the data into a fixed record format. .Sh RETURN VALUES On success the .Fn kinfo_getvmobject function returns a pointer to an array of .Vt struct kinfo_vmobject structures as defined by .In sys/user.h . The array is allocated by an internal call to .Xr malloc 3 and must be freed by the caller with a call to .Xr free 3 . On failure the .Fn kinfo_getvmobject function returns .Dv NULL . .Sh SEE ALSO .Xr free 3 , .Xr kinfo_getvmmap 3 , .Xr malloc 3 Index: head/sbin/hastd/refcnt.h =================================================================== --- head/sbin/hastd/refcnt.h (revision 344854) +++ head/sbin/hastd/refcnt.h (revision 344855) @@ -1,65 +1,64 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __REFCNT_H__ #define __REFCNT_H__ #include #include "pjdlog.h" typedef unsigned int refcnt_t; static __inline void refcnt_init(refcnt_t *count, unsigned int v) { *count = v; } static __inline void refcnt_acquire(refcnt_t *count) { atomic_add_acq_int(count, 1); } static __inline unsigned int refcnt_release(refcnt_t *count) { unsigned int old; /* XXX: Should this have a rel membar? */ old = atomic_fetchadd_int(count, -1); PJDLOG_ASSERT(old > 0); return (old - 1); } #endif /* ! __REFCNT_H__ */ Index: head/share/man/man3/sigevent.3 =================================================================== --- head/share/man/man3/sigevent.3 (revision 344854) +++ head/share/man/man3/sigevent.3 (revision 344855) @@ -1,127 +1,126 @@ .\" -*- nroff -*- .\" .\" Copyright (c) 2016 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd July 15, 2016 .Dt SIGEVENT 3 .Os .Sh NAME .Nm sigevent .Nd "asynchronous event notification" .Sh SYNOPSIS .In signal.h .Sh DESCRIPTION Some operations permit threads to request asychronous notification of events via a .Vt struct sigevent structure. This structure contains several fields that describe the requested notification: .Bl -column ".Vt void (*)(union sigval)" ".Va sigev_notify_kevent_flags" .It Sy Type Ta Sy Member Ta Sy Description .It Vt int Ta sigev_notify Ta notification method .It Vt int Ta sigev_signo Ta signal number .It Vt union sigval Ta sigev_value Ta signal value .It Vt int Ta sigev_notify_kqueue Ta .Xr kqueue 2 file descriptor .It Vt unsigned short Ta sigev_notify_kevent_flags Ta kevent flags .It Vt lwpid_t Ta sigev_notify_thread_id Ta LWP ID .It Vt void (*)(union sigval) Ta sigev_notify_function Ta callback function pointer .It Vt pthread_attr_t * Ta sigev_notify_attributes Ta callback thread attributes .El .Pp The .Va sigev_notify field specifies the notification method used when the event triggers: .Bl -tag -width ".Dv SIGEV_THREAD_ID" .It Dv SIGEV_NONE No notification is sent. .It Dv SIGEV_SIGNAL The signal .Va sigev_signo is queued as a real-time signal to the calling process. The value stored in .Va sigev_value will be present in the .Va si_value of the .Vt siginfo_t structure of the queued signal. .It Dv SIGEV_THREAD The notification function in .Va sigev_notify_function is called in a separate thread context. The thread is created with the attributes specified in .Va *sigev_notify_attributes . The value stored in .Va sigev_value is passed as the sole argument to .Va sigev_notify_function . If .Va sigev_notify_attributes is .Dv NULL , the thread is created with default attributes. .It Dv SIGEV_KEVENT A new kevent is posted to the kqueue .Va sigev_notify_kqueue . The .Va udata member of the kevent structure contains the value stored in .Va sigev_value . The meaning of other fields in the kevent are specific to the type of triggered event. .It Dv SIGEV_THREAD_ID The signal .Va sigev_signo is queued to the thread whose LWP ID is .Va sigev_notify_thread_id . The value stored in .Va sigev_value will be present in the .Va si_value of the .Vt siginfo_t structure of the queued signal. .El .Sh NOTES Note that programs wishing to use .Dv SIGEV_THREAD notifications must link against the .Lb librt . .Sh SEE ALSO .Xr aio_read 2 , .Xr mq_notify 2 , .Xr timer_create 2 , .Xr siginfo 3 .Sh STANDARDS The .Vt struct sigevent type conforms to .St -p1003.1-2004 . Index: head/share/man/man4/ktr.4 =================================================================== --- head/share/man/man4/ktr.4 (revision 344854) +++ head/share/man/man4/ktr.4 (revision 344855) @@ -1,211 +1,210 @@ .\" Copyright (c) 2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 20, 2012 .Dt KTR 4 .Os .Sh NAME .Nm ktr .Nd kernel tracing facility .Sh SYNOPSIS .Cd options KTR .Cd options ALQ .Cd options KTR_ALQ .Cd options KTR_COMPILE=(KTR_LOCK|KTR_INTR|KTR_PROC) .Cd options KTR_CPUMASK=0x3 .Cd options KTR_ENTRIES=8192 .Cd options KTR_MASK=(KTR_INTR|KTR_PROC) .Cd options KTR_VERBOSE .Sh DESCRIPTION The .Nm facility allows kernel events to be logged while the kernel executes so that they can be examined later when debugging. The only mandatory option to enable .Nm is .Dq Li options KTR . .Pp The .Dv KTR_ENTRIES option sets the size of the buffer of events. The size of the buffer in the currently running kernel can be found via the sysctl .Va debug.ktr.entries . By default the buffer contains 1024 entries. .Ss Event Masking Event levels can be enabled or disabled to trim excessive and overly verbose logging. First, a mask of events is specified at compile time via the .Dv KTR_COMPILE option to limit which events are actually compiled into the kernel. The default value for this option is for all events to be enabled. .Pp Secondly, the actual events logged while the kernel runs can be further masked via the run time event mask. The .Dv KTR_MASK option sets the default value of the run time event mask. The runtime event mask can also be set by the .Xr loader 8 via the .Va debug.ktr.mask environment variable. It can also be examined and set after booting via the .Va debug.ktr.mask sysctl. By default the run time mask is set to block any tracing. The definitions of the event mask bits can be found in .In sys/ktr.h . .Pp Furthermore, there is a CPU event mask whose default value can be changed via the .Dv KTR_CPUMASK option. When two or more parameters to .Dv KTR_CPUMASK , are used, it is important they are not separated by whitespace. A CPU must have the bit corresponding to its logical id set in this bitmask for events that occur on it to be logged. This mask can be set by the .Xr loader 8 via the .Va debug.ktr.cpumask environment variable. It can also be examined and set after booting via the .Va debug.ktr.cpumask sysctl. By default, only CPUs specified in .Dv KTR_CPUMASK will log events. See .Pa sys/conf/NOTES for more information. .Ss Verbose Mode By default, events are only logged to the internal buffer for examination later, but if the verbose flag is set then they are dumped to the kernel console as well. This flag can also be set from the loader via the .Va debug.ktr.verbose environment variable, or it can be examined and set after booting via the .Va debug.ktr.verbose sysctl. If the flag is set to zero, which is the default, then verbose output is disabled. If the flag is set to one, then the contents of the log message and the CPU number are printed to the kernel console. If the flag is greater than one, then the filename and line number of the event are output to the console in addition to the log message and the CPU number. The .Dv KTR_VERBOSE option sets the flag to one. .Ss Examining the Events The KTR buffer can be examined from within .Xr ddb 4 via the .Ic show ktr Op Cm /vV command. This command displays the contents of the trace buffer one page at a time. At the .Dq Li --more-- prompt, the Enter key displays one more entry and prompts again. The spacebar displays another page of entries. Any other key quits. By default the timestamp, filename, and line number are not displayed with each log entry. If the .Cm /v modifier is specified, then they are displayed in addition to the normal output. If the .Cm /V modifier is specified, then just the timestamp is displayed in addition to the normal output. Note that the events are displayed in reverse chronological order. That is, the most recent events are displayed first. .Ss Logging ktr to Disk The .Dv KTR_ALQ option can be used to log .Nm entries to disk for post analysis using the .Xr ktrdump 8 utility. This option depends on the .Dv ALQ option. Due to the potentially high volume of trace messages the trace mask should be selected carefully. This feature is configured through a group of sysctls. .Bl -tag -width ".Va debug.ktr.alq_enable" .It Va debug.ktr.alq_file displays or sets the file that .Nm will log to. By default its value is .Pa /tmp/ktr.out . If the file name is changed while .Nm is enabled it will not take effect until the next invocation. .It Va debug.ktr.alq_enable enables logging of .Nm entries to disk if it is set to one. Setting this to 0 will terminate logging to disk and revert to logging to the normal ktr ring buffer. Data is not sent to the ring buffer while logging to disk. .It Va debug.ktr.alq_max is the maximum number of entries that will be recorded to disk, or 0 for infinite. This is helpful for limiting the number of particularly high frequency entries that are recorded. .It Va debug.ktr.alq_depth determines the number of entries in the write buffer. This is the buffer that holds entries before they are written to disk and defaults to the value of the .Dv KTR_ENTRIES option. .It Va debug.ktr.alq_failed records the number of times we failed to write an entry due to overflowing the write buffer. This may happen if the frequency of the logged .Nm messages outpaces the depth of the queue. .It Va debug.ktr.alq_cnt records the number of entries that have currently been written to disk. .El .Sh SEE ALSO .Xr ktrdump 8 , .Xr alq 9 , .Xr ktr 9 .Sh HISTORY The KTR kernel tracing facility first appeared in .Bsx 3.0 and was imported into .Fx 5.0 . Index: head/share/man/man4/witness.4 =================================================================== --- head/share/man/man4/witness.4 (revision 344854) +++ head/share/man/man4/witness.4 (revision 344855) @@ -1,188 +1,187 @@ .\" Copyright (c) 2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 18, 2015 .Dt WITNESS 4 .Os .Sh NAME .Nm witness .Nd lock validation facility .Sh SYNOPSIS .Cd options WITNESS .Cd options WITNESS_COUNT .Cd options WITNESS_KDB .Cd options WITNESS_NO_VNODE .Cd options WITNESS_SKIPSPIN .Sh DESCRIPTION The .Nm module keeps track of the locks acquired and released by each thread. It also keeps track of the order in which locks are acquired with respect to each other. Each time a lock is acquired, .Nm uses these two lists to verify that a lock is not being acquired in the wrong order. If a lock order violation is detected, then a message is output to the kernel console or log detailing the locks involved and the locations in question. Witness can also be configured to drop into the kernel debugger when an order violation occurs. .Pp The .Nm code also checks various other conditions such as verifying that one does not recurse on a non-recursive lock, or attempt an upgrade on a shared lock held by another thread. If any of these checks fail, then the kernel will panic. .Pp The .Dv WITNESS_COUNT kernel option controls the maximum number of .Nm entries that are tracked in the kernel. The maximum number of entries can be queried via the .Va debug.witness.count sysctl. It can also be set from the .Xr loader 8 via the .Va debug.witness.count environment variable. .Pp The .Dv WITNESS_NO_VNODE kernel option tells .Nm to ignore locking issues between .Xr vnode 9 objects. .Pp The flag that controls whether or not the kernel debugger is entered when a lock order violation is detected can be set in a variety of ways. By default, the flag is off, but if the .Dv WITNESS_KDB kernel option is specified, then the flag will default to on. It can also be set from the .Xr loader 8 via the .Va debug.witness.kdb environment variable or after the kernel has booted via the .Va debug.witness.kdb sysctl. If the flag is set to zero, then the debugger will not be entered. If the flag is non-zero, then the debugger will be entered. .Pp The .Nm code can also be configured to skip all checks on spin mutexes. By default, this flag defaults to off, but it can be turned on by specifying the .Dv WITNESS_SKIPSPIN kernel option. The flag can also be set via the .Xr loader 8 environment variable .Va debug.witness.skipspin . If the variable is set to a non-zero value, then spin mutexes are skipped. Once the kernel has booted, the status of this flag can be examined but not set via the read-only sysctl .Va debug.witness.skipspin . .Pp The sysctl .Va debug.witness.watch specifies the level of witness involvement in the system. A value of 1 specifies that witness is enabled. A value of 0 specifies that witness is disabled, but that can be enabled again. This will maintain a small amount of overhead in the system. A value of -1 specifies that witness is disabled permanently and cannot be enabled again. The sysctl .Va debug.witness.watch can be set via .Xr loader 8 . .Pp The sysctl .Va debug.witness.output_channel specifies the output channel used to display warnings emitted by .Nm . The possible values are .Ql console , indicating that warnings are to be printed to the system console, .Ql log , indicating that warnings are to be logged via .Xr log 9 , and .Ql none . This sysctl can be set via .Xr loader 8 . .Pp The .Nm code also provides three extra .Xr ddb 4 commands if both .Nm and .Xr ddb 4 are compiled into the kernel: .Bl -ohang .It Ic show locks Op thread Outputs the list of locks held by a thread to the kernel console along with the filename and line number at which each lock was last acquired by the thread. The optional .Ar thread argument may be either a TID, PID, or pointer to a thread structure. If .Ar thread is not specified, then the locks held by the current thread are displayed. .It Ic show all locks Outputs the list of locks held by all threads in the system to the kernel console. .It Ic show witness Dump the current order list to the kernel console. The code first displays the lock order tree for all of the sleep locks. Then it displays the lock order tree for all of the spin locks. Finally, it displays a list of locks that have not yet been acquired. .El .Sh SEE ALSO .Xr ddb 4 , .Xr loader 8 , .Xr sysctl 8 , .Xr mutex 9 .Sh HISTORY The .Nm code first appeared in .Bsx 5.0 and was imported from there into .Fx 5.0 . Index: head/share/man/man9/BUS_GET_CPUS.9 =================================================================== --- head/share/man/man9/BUS_GET_CPUS.9 (revision 344854) +++ head/share/man/man9/BUS_GET_CPUS.9 (revision 344855) @@ -1,101 +1,100 @@ .\" -*- nroff -*- .\" .\" Copyright (c) 2016 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd March 1, 2016 .Dt BUS_GET_CPUS 9 .Os .Sh NAME .Nm BUS_GET_CPUS , .Nm bus_get_cpus .Nd "request a set of device-specific CPUs" .Sh SYNOPSIS .In sys/param.h .In sys/bus.h .In sys/cpuset.h .Ft int .Fo BUS_GET_CPUS .Fa "device_t dev" "device_t child" "enum cpu_sets op" "size_t setsize" .Fa "cpuset_t *cpuset" .Fc .Ft int .Fo bus_get_cpus .Fa "device_t dev" "enum cpu_sets op" "size_t setsize" "cpuset_t *cpuset" .Fc .Sh DESCRIPTION The .Fn BUS_GET_CPUS method queries the parent bus device for a set of device-specific CPUs. The .Fa op argument specifies which set of CPUs to retrieve. If successful, the requested set of CPUs are returned in .Fa cpuset . The .Fa setsize argument specifies the size in bytes of the set passed in .Fa cpuset . .Pp .Fn BUS_GET_CPUS supports querying different types of CPU sets via the .Fa op argument. Not all set types are supported for every device. If a set type is not supported, .Fn BUS_GET_CPUS fails with .Er EINVAL . These set types are supported: .Bl -tag -width ".Dv LOCAL_CPUS" .It Dv LOCAL_CPUS The set of CPUs that are local to the device. If a device is closer to a specific memory domain in a non-uniform memory architecture system .Pq NUMA , this will return the set of CPUs in that memory domain. .It Dv INTR_CPUS The preferred set of CPUs that this device should use for device interrupts. This set type must be supported by all bus drivers. .El .Pp The .Fn bus_get_cpus function is a simple wrapper around .Fn BUS_GET_CPUS . .Sh RETURN VALUES Zero is returned on success, otherwise an appropriate error is returned. .Sh SEE ALSO .Xr cpuset 2 , .Xr BUS_BIND_INTR 9 , .Xr device 9 .Sh HISTORY The .Fn BUS_GET_CPUS method and .Fn bus_get_cpus function first appeared in .Fx 11.0 . Index: head/share/man/man9/BUS_RESCAN.9 =================================================================== --- head/share/man/man9/BUS_RESCAN.9 (revision 344854) +++ head/share/man/man9/BUS_RESCAN.9 (revision 344855) @@ -1,51 +1,50 @@ .\" -*- nroff -*- .\" .\" Copyright (c) 2016 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd April 27, 2016 .Dt BUS_RESCAN 9 .Os .Sh NAME .Nm BUS_RESCAN .Nd "rescan a bus checking for devices that have been added or removed" .Sh SYNOPSIS .In sys/param.h .In sys/bus.h .Ft void .Fn BUS_RESCAN "device_t dev" .Sh DESCRIPTION The .Fn BUS_RESCAN method is called to request a rescan of the child devices on a bus device. The method should add any devices that have been added since the previous scan and remove devices that have been removed. This method is not required to re-examine existing devices to determine if their properties have changed. This method is also not required to propagate the rescan request to child devices. .Sh SEE ALSO .Xr device 9 Index: head/share/man/man9/atomic.9 =================================================================== --- head/share/man/man9/atomic.9 (revision 344854) +++ head/share/man/man9/atomic.9 (revision 344855) @@ -1,602 +1,601 @@ .\" Copyright (c) 2000-2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd December 22, 2017 .Dt ATOMIC 9 .Os .Sh NAME .Nm atomic_add , .Nm atomic_clear , .Nm atomic_cmpset , .Nm atomic_fcmpset , .Nm atomic_fetchadd , .Nm atomic_load , .Nm atomic_readandclear , .Nm atomic_set , .Nm atomic_subtract , .Nm atomic_store , .Nm atomic_thread_fence .Nd atomic operations .Sh SYNOPSIS .In sys/types.h .In machine/atomic.h .Ft void .Fn atomic_add_[acq_|rel_] "volatile *p" " v" .Ft void .Fn atomic_clear_[acq_|rel_] "volatile *p" " v" .Ft int .Fo atomic_cmpset_[acq_|rel_] .Fa "volatile *dst" .Fa " old" .Fa " new" .Fc .Ft int .Fo atomic_fcmpset_[acq_|rel_] .Fa "volatile *dst" .Fa " *old" .Fa " new" .Fc .Ft .Fn atomic_fetchadd_ "volatile *p" " v" .Ft .Fn atomic_load_[acq_] "volatile *p" .Ft .Fn atomic_readandclear_ "volatile *p" .Ft void .Fn atomic_set_[acq_|rel_] "volatile *p" " v" .Ft void .Fn atomic_subtract_[acq_|rel_] "volatile *p" " v" .Ft void .Fn atomic_store_[rel_] "volatile *p" " v" .Ft .Fn atomic_swap_ "volatile *p" " v" .Ft int .Fn atomic_testandclear_ "volatile *p" "u_int v" .Ft int .Fn atomic_testandset_ "volatile *p" "u_int v" .Ft void .Fn atomic_thread_fence_[acq|acq_rel|rel|seq_cst] "void" .Sh DESCRIPTION Atomic operations are commonly used to implement reference counts and as building blocks for synchronization primitives, such as mutexes. .Pp All of these operations are performed .Em atomically across multiple threads and in the presence of interrupts, meaning that they are performed in an indivisible manner from the perspective of concurrently running threads and interrupt handlers. .Pp On all architectures supported by .Fx , ordinary loads and stores of integers in cache-coherent memory are inherently atomic if the integer is naturally aligned and its size does not exceed the processor's word size. However, such loads and stores may be elided from the program by the compiler, whereas atomic operations are always performed. .Pp When atomic operations are performed on cache-coherent memory, all operations on the same location are totally ordered. .Pp When an atomic load is performed on a location in cache-coherent memory, it reads the entire value that was defined by the last atomic store to each byte of the location. An atomic load will never return a value out of thin air. When an atomic store is performed on a location, no other thread or interrupt handler will observe a .Em torn write , or partial modification of the location. .Pp Except as noted below, the semantics of these operations are almost identical to the semantics of similarly named C11 atomic operations. .Ss Types Most atomic operations act upon a specific .Fa type . That type is indicated in the function name. In contrast to C11 atomic operations, .Fx Ns 's atomic operations are performed on ordinary integer types. The available types are: .Pp .Bl -tag -offset indent -width short -compact .It Li int unsigned integer .It Li long unsigned long integer .It Li ptr unsigned integer the size of a pointer .It Li 32 unsigned 32-bit integer .It Li 64 unsigned 64-bit integer .El .Pp For example, the function to atomically add two integers is called .Fn atomic_add_int . .Pp Certain architectures also provide operations for types smaller than .Dq Li int . .Pp .Bl -tag -offset indent -width short -compact .It Li char unsigned character .It Li short unsigned short integer .It Li 8 unsigned 8-bit integer .It Li 16 unsigned 16-bit integer .El .Pp These types must not be used in machine-independent code. .Ss Acquire and Release Operations By default, a thread's accesses to different memory locations might not be performed in .Em program order , that is, the order in which the accesses appear in the source code. To optimize the program's execution, both the compiler and processor might reorder the thread's accesses. However, both ensure that their reordering of the accesses is not visible to the thread. Otherwise, the traditional memory model that is expected by single-threaded programs would be violated. Nonetheless, other threads in a multithreaded program, such as the .Fx kernel, might observe the reordering. Moreover, in some cases, such as the implementation of synchronization between threads, arbitrary reordering might result in the incorrect execution of the program. To constrain the reordering that both the compiler and processor might perform on a thread's accesses, a programmer can use atomic operations with .Em acquire and .Em release semantics. .Pp Atomic operations on memory have up to three variants. The first, or .Em relaxed variant, performs the operation without imposing any ordering constraints on accesses to other memory locations. This variant is the default. The second variant has acquire semantics, and the third variant has release semantics. .Pp When an atomic operation has acquire semantics, the operation must have completed before any subsequent load or store (by program order) is performed. Conversely, acquire semantics do not require that prior loads or stores have completed before the atomic operation is performed. An atomic operation can only have acquire semantics if it performs a load from memory. To denote acquire semantics, the suffix .Dq Li _acq is inserted into the function name immediately prior to the .Dq Li _ Ns Aq Fa type suffix. For example, to subtract two integers ensuring that the subtraction is completed before any subsequent loads and stores are performed, use .Fn atomic_subtract_acq_int . .Pp When an atomic operation has release semantics, all prior loads or stores (by program order) must have completed before the operation is performed. Conversely, release semantics do not require that the atomic operation must have completed before any subsequent load or store is performed. An atomic operation can only have release semantics if it performs a store to memory. To denote release semantics, the suffix .Dq Li _rel is inserted into the function name immediately prior to the .Dq Li _ Ns Aq Fa type suffix. For example, to add two long integers ensuring that all prior loads and stores are completed before the addition is performed, use .Fn atomic_add_rel_long . .Pp When a release operation by one thread .Em synchronizes with an acquire operation by another thread, usually meaning that the acquire operation reads the value written by the release operation, then the effects of all prior stores by the releasing thread must become visible to subsequent loads by the acquiring thread. Moreover, the effects of all stores (by other threads) that were visible to the releasing thread must also become visible to the acquiring thread. These rules only apply to the synchronizing threads. Other threads might observe these stores in a different order. .Pp In effect, atomic operations with acquire and release semantics establish one-way barriers to reordering that enable the implementations of synchronization primitives to express their ordering requirements without also imposing unnecessary ordering. For example, for a critical section guarded by a mutex, an acquire operation when the mutex is locked and a release operation when the mutex is unlocked will prevent any loads or stores from moving outside of the critical section. However, they will not prevent the compiler or processor from moving loads or stores into the critical section, which does not violate the semantics of a mutex. .Ss Thread Fence Operations Alternatively, a programmer can use atomic thread fence operations to constrain the reordering of accesses. In contrast to other atomic operations, fences do not, themselves, access memory. .Pp When a fence has acquire semantics, all prior loads (by program order) must have completed before any subsequent load or store is performed. Thus, an acquire fence is a two-way barrier for load operations. To denote acquire semantics, the suffix .Dq Li _acq is appended to the function name, for example, .Fn atomic_thread_fence_acq . .Pp When a fence has release semantics, all prior loads or stores (by program order) must have completed before any subsequent store operation is performed. Thus, a release fence is a two-way barrier for store operations. To denote release semantics, the suffix .Dq Li _rel is appended to the function name, for example, .Fn atomic_thread_fence_rel . .Pp Although .Fn atomic_thread_fence_acq_rel implements both acquire and release semantics, it is not a full barrier. For example, a store prior to the fence (in program order) may be completed after a load subsequent to the fence. In contrast, .Fn atomic_thread_fence_seq_cst implements a full barrier. Neither loads nor stores may cross this barrier in either direction. .Pp In C11, a release fence by one thread synchronizes with an acquire fence by another thread when an atomic load that is prior to the acquire fence (by program order) reads the value written by an atomic store that is subsequent to the release fence. In constrast, in FreeBSD, because of the atomicity of ordinary, naturally aligned loads and stores, fences can also be synchronized by ordinary loads and stores. This simplifies the implementation and use of some synchronization primitives in .Fx . .Pp Since neither a compiler nor a processor can foresee which (atomic) load will read the value written by an (atomic) store, the ordering constraints imposed by fences must be more restrictive than acquire loads and release stores. Essentially, this is why fences are two-way barriers. .Pp Although fences impose more restrictive ordering than acquire loads and release stores, by separating access from ordering, they can sometimes facilitate more efficient implementations of synchronization primitives. For example, they can be used to avoid executing a memory barrier until a memory access shows that some condition is satisfied. .Ss Multiple Processors In multiprocessor systems, the atomicity of the atomic operations on memory depends on support for cache coherence in the underlying architecture. In general, cache coherence on the default memory type, .Dv VM_MEMATTR_DEFAULT , is guaranteed by all architectures that are supported by .Fx . For example, cache coherence is guaranteed on write-back memory by the .Tn amd64 and .Tn i386 architectures. However, on some architectures, cache coherence might not be enabled on all memory types. To determine if cache coherence is enabled for a non-default memory type, consult the architecture's documentation. .Ss Semantics This section describes the semantics of each operation using a C like notation. .Bl -hang .It Fn atomic_add p v .Bd -literal -compact *p += v; .Ed .It Fn atomic_clear p v .Bd -literal -compact *p &= ~v; .Ed .It Fn atomic_cmpset dst old new .Bd -literal -compact if (*dst == old) { *dst = new; return (1); } else return (0); .Ed .El .Pp Some architectures do not implement the .Fn atomic_cmpset functions for the types .Dq Li char , .Dq Li short , .Dq Li 8 , and .Dq Li 16 . .Bl -hang .It Fn atomic_fcmpset dst *old new .El .Pp On architectures implementing .Em Compare And Swap operation in hardware, the functionality can be described as .Bd -literal -offset indent -compact if (*dst == *old) { *dst = new; return (1); } else { *old = *dst; return (0); } .Ed On architectures which provide .Em Load Linked/Store Conditional primitive, the write to .Dv *dst might also fail for several reasons, most important of which is a parallel write to .Dv *dst cache line by other CPU. In this case .Fn atomic_fcmpset function also returns .Dv false , despite .Dl *old == *dst . .Pp Some architectures do not implement the .Fn atomic_fcmpset functions for the types .Dq Li char , .Dq Li short , .Dq Li 8 , and .Dq Li 16 . .Bl -hang .It Fn atomic_fetchadd p v .Bd -literal -compact tmp = *p; *p += v; return (tmp); .Ed .El .Pp The .Fn atomic_fetchadd functions are only implemented for the types .Dq Li int , .Dq Li long and .Dq Li 32 and do not have any variants with memory barriers at this time. .Bl -hang .It Fn atomic_load p .Bd -literal -compact return (*p); .Ed .It Fn atomic_readandclear p .Bd -literal -compact tmp = *p; *p = 0; return (tmp); .Ed .El .Pp The .Fn atomic_readandclear functions are not implemented for the types .Dq Li char , .Dq Li short , .Dq Li ptr , .Dq Li 8 , and .Dq Li 16 and do not have any variants with memory barriers at this time. .Bl -hang .It Fn atomic_set p v .Bd -literal -compact *p |= v; .Ed .It Fn atomic_subtract p v .Bd -literal -compact *p -= v; .Ed .It Fn atomic_store p v .Bd -literal -compact *p = v; .Ed .It Fn atomic_swap p v .Bd -literal -compact tmp = *p; *p = v; return (tmp); .Ed .El .Pp The .Fn atomic_swap functions are not implemented for the types .Dq Li char , .Dq Li short , .Dq Li ptr , .Dq Li 8 , and .Dq Li 16 and do not have any variants with memory barriers at this time. .Bl -hang .It Fn atomic_testandclear p v .Bd -literal -compact bit = 1 << (v % (sizeof(*p) * NBBY)); tmp = (*p & bit) != 0; *p &= ~bit; return (tmp); .Ed .El .Bl -hang .It Fn atomic_testandset p v .Bd -literal -compact bit = 1 << (v % (sizeof(*p) * NBBY)); tmp = (*p & bit) != 0; *p |= bit; return (tmp); .Ed .El .Pp The .Fn atomic_testandset and .Fn atomic_testandclear functions are only implemented for the types .Dq Li int , .Dq Li long and .Dq Li 32 and do not have any variants with memory barriers at this time. .Pp The type .Dq Li 64 is currently not implemented for any of the atomic operations on the .Tn arm , .Tn i386 , and .Tn powerpc architectures. .Sh RETURN VALUES The .Fn atomic_cmpset function returns the result of the compare operation. The .Fn atomic_fcmpset function returns .Dv true if the operation succeeded. Otherwise it returns .Dv false and sets .Va *old to the found value. The .Fn atomic_fetchadd , .Fn atomic_load , .Fn atomic_readandclear , and .Fn atomic_swap functions return the value at the specified address. The .Fn atomic_testandset and .Fn atomic_testandclear function returns the result of the test operation. .Sh EXAMPLES This example uses the .Fn atomic_cmpset_acq_ptr and .Fn atomic_set_ptr functions to obtain a sleep mutex and handle recursion. Since the .Va mtx_lock member of a .Vt "struct mtx" is a pointer, the .Dq Li ptr type is used. .Bd -literal /* Try to obtain mtx_lock once. */ #define _obtain_lock(mp, tid) \\ atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) /* Get a sleep lock, deal with recursion inline. */ #define _get_sleep_lock(mp, tid, opts, file, line) do { \\ uintptr_t _tid = (uintptr_t)(tid); \\ \\ if (!_obtain_lock(mp, tid)) { \\ if (((mp)->mtx_lock & MTX_FLAGMASK) != _tid) \\ _mtx_lock_sleep((mp), _tid, (opts), (file), (line));\\ else { \\ atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSE); \\ (mp)->mtx_recurse++; \\ } \\ } \\ } while (0) .Ed .Sh HISTORY The .Fn atomic_add , .Fn atomic_clear , .Fn atomic_set , and .Fn atomic_subtract operations were introduced in .Fx 3.0 . Initially, these operations were defined on the types .Dq Li char , .Dq Li short , .Dq Li int , and .Dq Li long . .Pp The .Fn atomic_cmpset , .Fn atomic_load_acq , .Fn atomic_readandclear , and .Fn atomic_store_rel operations were added in .Fx 5.0 . Simultaneously, the acquire and release variants were introduced, and support was added for operation on the types .Dq Li 8 , .Dq Li 16 , .Dq Li 32 , .Dq Li 64 , and .Dq Li ptr . .Pp The .Fn atomic_fetchadd operation was added in .Fx 6.0 . .Pp The .Fn atomic_swap and .Fn atomic_testandset operations were added in .Fx 10.0 . .Pp The .Fn atomic_testandclear and .Fn atomic_thread_fence operations were added in .Fx 11.0 . .Pp The relaxed variants of .Fn atomic_load and .Fn atomic_store were added in .Fx 12.0 . Index: head/share/man/man9/bus_map_resource.9 =================================================================== --- head/share/man/man9/bus_map_resource.9 (revision 344854) +++ head/share/man/man9/bus_map_resource.9 (revision 344855) @@ -1,166 +1,165 @@ .\" -*- nroff -*- .\" .\" Copyright (c) 2016 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd February 5, 2018 .Dt BUS_MAP_RESOURCE 9 .Os .Sh NAME .Nm bus_map_resource , bus_unmap_resource , resource_init_map_request .Nd map or unmap an active resource .Sh SYNOPSIS .In sys/param.h .In sys/bus.h .Pp .In machine/bus.h .In sys/rman.h .In machine/resource.h .Ft int .Fo bus_map_resource .Fa "device_t dev" "int type" "struct resource *r" .Fa "struct resource_map_request *args" "struct resource_map *map" .Fc .Ft int .Fo bus_unmap_resource .Fa "device_t dev" "int type" "struct resource *r" "struct resource_map *map" .Fc .Ft void .Fn resource_init_map_request "struct resource_map_request *args" .Sh DESCRIPTION These functions create or destroy a mapping of a previously activated resource. Mappings permit CPU access to the resource via the .Xr bus_space 9 API. .Pp The arguments are as follows: .Bl -tag -width indent .It Fa dev The device that owns the resource. .It Fa type The type of resource to map. It is one of: .Pp .Bl -tag -width ".Dv SYS_RES_MEMORY" -compact .It Dv SYS_RES_IOPORT for I/O ports .It Dv SYS_RES_MEMORY for I/O memory .El .It Fa r A pointer to the .Vt "struct resource" returned by .Xr bus_alloc_resource 9 . .It Fa args A set of optional properties to apply when creating a mapping. This argument can be set to .Dv NULL to request a mapping of the entire resource with the default properties. .It Fa map The resource mapping to create or destroy. .El .Ss Resource Mappings Resource mappings are described by a .Vt "struct resource_map" object. This structure contains a .Xr bus_space 9 tag and handle in the .Va r_bustag and .Va r_bushandle members that can be used for CPU access to the mapping. The structure also contains a .Va r_vaddr member which contains the virtual address of the mapping if one exists. .Pp The wrapper API for .Vt "struct resource" objects described in .Xr bus_activate_resource 9 can also be used with .Vt "struct resource_map" . For example, a pointer to a mapping object can be passed as the first argument to .Fn bus_read_4 . This wrapper API is preferred over using the .Va r_bustag and .Va r_bushandle members directly. .Ss Optional Mapping Properties The .Vt "struct resource_map_request" object passed in .Fa args can be used to specify optional properties of a mapping. The structure must be initialized by invoking .Fn resource_init_map_request . Properties are then specified by setting one or more of these members: .Bl -tag -width indent .It Va offset , length These two members specify a region of the resource to map. By default a mapping is created for the entire resource. The .Va offset is relative to the start of the resource. .It Va memattr Specifies a memory attribute to use when mapping the resource. By default memory mappings use the .Dv VM_MEMATTR_UNCACHEABLE attribute. .El .Sh RETURN VALUES Zero is returned on success, otherwise an error is returned. .Sh EXAMPLES This maps a PCI memory BAR with the write-combining memory attribute and reads the first 32-bit word: .Bd -literal struct resource *r; struct resource_map map; struct resource_map_request req; uint32_t val; int rid; rid = PCIR_BAR(0); r = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE | RF_UNMAPPED); resource_init_map_request(&req); req.memattr = VM_MEMATTR_WRITE_COMBINING; bus_map_resource(dev, SYS_RES_MEMORY, r, &req, &map); val = bus_read_4(&map, 0); .Ed .Sh SEE ALSO .Xr bus_activate_resource 9 , .Xr bus_alloc_resource 9 , .Xr bus_space 9 , .Xr device 9 , .Xr driver 9 .Sh AUTHORS This manual page was written by .An John Baldwin Aq Mt jhb@FreeBSD.org . Index: head/share/man/man9/critical_enter.9 =================================================================== --- head/share/man/man9/critical_enter.9 (revision 344854) +++ head/share/man/man9/critical_enter.9 (revision 344855) @@ -1,74 +1,73 @@ .\" Copyright (c) 2001,2002 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd October 5, 2005 .Dt CRITICAL_ENTER 9 .Os .Sh NAME .Nm critical_enter , .Nm critical_exit .Nd enter and exit a critical region .Sh SYNOPSIS .In sys/param.h .In sys/systm.h .Ft void .Fn critical_enter "void" .Ft void .Fn critical_exit "void" .Sh DESCRIPTION These functions are used to prevent preemption in a critical region of code. All that is guaranteed is that the thread currently executing on a CPU will not be preempted. Specifically, a thread in a critical region will not migrate to another CPU while it is in a critical region. The current CPU may still trigger faults and exceptions during a critical section; however, these faults are usually fatal. .Pp The .Fn critical_enter and .Fn critical_exit functions manage a per-thread counter to handle nested critical sections. If a thread is made runnable that would normally preempt the current thread while the current thread is in a critical section, then the preemption will be deferred until the current thread exits the outermost critical section. .Pp Note that these functions are not required to provide any inter-CPU synchronization, data protection, or memory ordering guarantees and thus should .Em not be used to protect shared data structures. .Pp These functions should be used with care as an infinite loop within a critical region will deadlock the CPU. Also, they should not be interlocked with operations on mutexes, sx locks, semaphores, or other synchronization primitives. One exception to this is that spin mutexes include a critical section, so in certain cases critical sections may be interlocked with spin mutexes. .Sh HISTORY These functions were introduced in .Fx 5.0 . Index: head/share/man/man9/ithread.9 =================================================================== --- head/share/man/man9/ithread.9 (revision 344854) +++ head/share/man/man9/ithread.9 (revision 344855) @@ -1,351 +1,350 @@ .\" Copyright (c) 2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd August 25, 2006 .Dt ITHREAD 9 .Os .Sh NAME .Nm ithread_add_handler , .Nm ithread_create , .Nm ithread_destroy , .Nm ithread_priority , .Nm ithread_remove_handler , .Nm ithread_schedule .Nd kernel interrupt threads .Sh SYNOPSIS .In sys/param.h .In sys/bus.h .In sys/interrupt.h .Ft int .Fo ithread_add_handler .Fa "struct ithd *ithread" .Fa "const char *name" .Fa "driver_intr_t handler" .Fa "void *arg" .Fa "u_char pri" .Fa "enum intr_type flags" .Fa "void **cookiep" .Fc .Ft int .Fo ithread_create .Fa "struct ithd **ithread" .Fa "int vector" .Fa "int flags" .Fa "void (*disable)(int)" .Fa "void (*enable)(int)" .Fa "const char *fmt" .Fa "..." .Fc .Ft int .Fn ithread_destroy "struct ithd *ithread" .Ft u_char .Fn ithread_priority "enum intr_type flags" .Ft int .Fn ithread_remove_handler "void *cookie" .Ft int .Fn ithread_schedule "struct ithd *ithread" "int do_switch" .Sh DESCRIPTION Interrupt threads are kernel threads that run a list of handlers when triggered by either a hardware or software interrupt. Each interrupt handler has a name, handler function, handler argument, priority, and various flags. Each interrupt thread maintains a list of handlers sorted by priority. This results in higher priority handlers being executed prior to lower priority handlers. Each thread assumes the priority of its highest priority handler for its process priority, or .Dv PRIO_MAX if it has no handlers. Interrupt threads are also associated with a single interrupt source, represented as a vector number. .Pp The .Fn ithread_create function creates a new interrupt thread. The .Fa ithread argument points to an .Vt struct ithd pointer that will point to the newly created thread upon success. The .Fa vector argument specifies the interrupt source to associate this thread with. The .Fa flags argument is a mask of properties of this thread. The only valid flag currently for .Fn ithread_create is .Dv IT_SOFT to specify that this interrupt thread is a software interrupt. The .Fa enable and .Fa disable arguments specify optional functions used to enable and disable this interrupt thread's interrupt source. The functions receive the vector corresponding to the thread's interrupt source as their only argument. The remaining arguments form a .Xr printf 9 argument list that is used to build the base name of the new ithread. The full name of an interrupt thread is formed by concatenating the base name of an interrupt thread with the names of all of its interrupt handlers. .Pp The .Fn ithread_destroy function destroys a previously created interrupt thread by releasing its resources and arranging for the backing kernel thread to terminate. An interrupt thread can only be destroyed if it has no handlers remaining. .Pp The .Fn ithread_add_handler function adds a new handler to an existing interrupt thread specified by .Fa ithread . The .Fa name argument specifies a name for this handler. The .Fa handler and .Fa arg arguments provide the function to execute for this handler and an argument to pass to it. The .Fa pri argument specifies the priority of this handler and is used both in sorting it in relation to the other handlers for this thread and to specify the priority of the backing kernel thread. The .Fa flags argument can be used to specify properties of this handler as defined in .In sys/bus.h . If .Fa cookiep is not .Dv NULL , then it will be assigned a cookie that can be used later to remove this handler. .Pp The .Fn ithread_remove_handler removes a handler from an interrupt thread. The .Fa cookie argument specifies the handler to remove from its thread. .Pp The .Fn ithread_schedule function schedules an interrupt thread to run. If the .Fa do_switch argument is non-zero and the interrupt thread is idle, then a context switch will be forced after putting the interrupt thread on the run queue. .Pp The .Fn ithread_priority function translates the .Dv INTR_TYPE_* interrupt flags into interrupt handler priorities. .Pp The interrupt flags not related to the type of a particular interrupt .Pq Dv INTR_TYPE_* can be used to specify additional properties of both hardware and software interrupt handlers. The .Dv INTR_EXCL flag specifies that this handler cannot share an interrupt thread with another handler. The .Dv INTR_MPSAFE flag specifies that this handler is MP safe in that it does not need the Giant mutex to be held while it is executed. The .Dv INTR_ENTROPY flag specifies that the interrupt source this handler is tied to is a good source of entropy, and thus that entropy should be gathered when an interrupt from the handler's source triggers. Presently, the .Dv INTR_ENTROPY flag is not valid for software interrupt handlers. .Pp It is not permitted to sleep in an interrupt thread; hence, any memory or zone allocations in an interrupt thread should be specified with the .Dv M_NOWAIT flag set. Any allocation errors must be handled thereafter. .Sh RETURN VALUES The .Fn ithread_add_handler , .Fn ithread_create , .Fn ithread_destroy , .Fn ithread_remove_handler , and .Fn ithread_schedule functions return zero on success and non-zero on failure. The .Fn ithread_priority function returns a process priority corresponding to the passed in interrupt flags. .Sh EXAMPLES The .Fn swi_add function demonstrates the use of .Fn ithread_create and .Fn ithread_add_handler . .Bd -literal -offset indent int swi_add(struct ithd **ithdp, const char *name, driver_intr_t handler, void *arg, int pri, enum intr_type flags, void **cookiep) { struct proc *p; struct ithd *ithd; int error; if (flags & INTR_ENTROPY) return (EINVAL); ithd = (ithdp != NULL) ? *ithdp : NULL; if (ithd != NULL) { if ((ithd->it_flags & IT_SOFT) == 0) return(EINVAL); } else { error = ithread_create(&ithd, pri, IT_SOFT, NULL, NULL, "swi%d:", pri); if (error) return (error); if (ithdp != NULL) *ithdp = ithd; } return (ithread_add_handler(ithd, name, handler, arg, pri + PI_SOFT, flags, cookiep)); } .Ed .Sh ERRORS The .Fn ithread_add_handler function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Any of the .Fa ithread , .Fa handler , or .Fa name arguments are .Dv NULL . .It Bq Er EINVAL The .Dv INTR_EXCL flag is specified and the interrupt thread .Fa ithread already has at least one handler, or the interrupt thread .Fa ithread already has an exclusive handler. .It Bq Er ENOMEM Could not allocate needed memory for this handler. .El .Pp The .Fn ithread_create function will fail if: .Bl -tag -width Er .It Bq Er EAGAIN The system-imposed limit on the total number of processes under execution would be exceeded. The limit is given by the .Xr sysctl 3 MIB variable .Dv KERN_MAXPROC . .It Bq Er EINVAL A flag other than .Dv IT_SOFT was specified in the .Fa flags parameter. .It Bq Er ENOMEM Could not allocate needed memory for this interrupt thread. .El .Pp The .Fn ithread_destroy function will fail if: .Bl -tag -width Er .It Bq Er EINVAL The .Fa ithread argument is .Dv NULL . .It Bq Er EINVAL The interrupt thread pointed to by .Fa ithread has at least one handler. .El .Pp The .Fn ithread_remove_handler function will fail if: .Bl -tag -width Er .It Bq Er EINVAL The .Fa cookie argument is .Dv NULL . .El .Pp The .Fn ithread_schedule function will fail if: .Bl -tag -width Er .It Bq Er EINVAL The .Fa ithread argument is .Dv NULL . .It Bq Er EINVAL The interrupt thread pointed to by .Fa ithread has no interrupt handlers. .El .Sh SEE ALSO .Xr kthread 9 , .Xr malloc 9 , .Xr swi 9 , .Xr uma 9 .Sh HISTORY Interrupt threads and their corresponding API first appeared in .Fx 5.0 . .Sh BUGS Currently .Vt struct ithd represents both an interrupt source and an interrupt thread. There should be a separate .Vt struct isrc that contains a vector number, enable and disable functions, etc.\& that an ithread holds a reference to. Index: head/share/man/man9/ktr.9 =================================================================== --- head/share/man/man9/ktr.9 (revision 344854) +++ head/share/man/man9/ktr.9 (revision 344855) @@ -1,162 +1,161 @@ .\" Copyright (c) 2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 30, 2008 .Dt KTR 9 .Os .Sh NAME .Nm CTR0 , CTR1 , CTR2 , CTR3 , CTR4 , CTR5 .Nd kernel tracing facility .Sh SYNOPSIS .In sys/param.h .In sys/ktr.h .Vt "extern int ktr_cpumask" ; .Vt "extern int ktr_entries" ; .Vt "extern int ktr_extend" ; .Vt "extern int ktr_mask" ; .Vt "extern int ktr_verbose" ; .Vt "extern struct ktr_entry ktr_buf[]" ; .Ft void .Fn CTR0 "u_int mask" "char *format" .Ft void .Fn CTR1 "u_int mask" "char *format" "arg1" .Ft void .Fn CTR2 "u_int mask" "char *format" "arg1" "arg2" .Ft void .Fn CTR3 "u_int mask" "char *format" "arg1" "arg2" "arg3" .Ft void .Fn CTR4 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4" .Ft void .Fn CTR5 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4" "arg5" .Ft void .Fn CTR6 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4" "arg5" "arg6" .Sh DESCRIPTION KTR provides a circular buffer of events that can be logged in a .Xr printf 9 style fashion. These events can then be dumped with .Xr ddb 4 , .Xr gdb 1 or .Xr ktrdump 8 . .Pp Events are created and logged in the kernel via the .Dv CTR Ns Ar x macros. The first parameter is a mask of event types .Pq Dv KTR_* defined in .In sys/ktr.h . The event will be logged only if any of the event types specified in .Fa mask are enabled in the global event mask stored in .Va ktr_mask . The .Fa format argument is a .Xr printf 9 style format string used to build the text of the event log message. Following the .Fa format string are zero to five arguments referenced by .Fa format . Each event is logged with a file name and source line number of the originating CTR call, and a timestamp in addition to the log message. .Pp The event is stored in the circular buffer with supplied arguments as is, and formatting is done at the dump time. Do not use pointers to the objects with limited lifetime, for instance, strings, because the pointer may become invalid when buffer is printed. .Pp Note that the different macros differ only in the number of arguments each one takes, as indicated by its name. .Pp The .Va ktr_entries variable contains the number of entries in the .Va ktr_buf array. These variables are mostly useful for post-mortem crash dump tools to locate the base of the circular trace buffer and its length. .Pp The .Va ktr_mask variable contains the run time mask of events to log. .Pp The CPU event mask is stored in the .Va ktr_cpumask variable. .Pp The .Va ktr_verbose variable stores the verbose flag that controls whether events are logged to the console in addition to the event buffer. .Sh EXAMPLES This example demonstrates the use of tracepoints at the .Dv KTR_PROC logging level. .Bd -literal void mi_switch() { ... /* * Pick a new current process and record its start time. */ ... CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d)", p, p->p_pid); ... cpu_switch(); ... CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d)", p, p->p_pid); ... } .Ed .Sh SEE ALSO .Xr ktr 4 , .Xr ktrdump 8 .Sh HISTORY The KTR kernel tracing facility first appeared in .Bsx 3.0 and was imported into .Fx 5.0 . .Sh BUGS Currently there is one global buffer shared among all CPUs. It might be profitable at some point in time to use per-CPU buffers instead so that if one CPU halts or starts spinning, then the log messages it emitted just prior to halting or spinning will not be drowned out by events from the other CPUs. .Pp The arguments given in .Fn CTRx macros are stored as .Vt u_long , so do not pass arguments larger than size of an .Vt u_long type. For example passing 64bit arguments on 32bit architectures will give incorrect results. Index: head/share/man/man9/runqueue.9 =================================================================== --- head/share/man/man9/runqueue.9 (revision 344854) +++ head/share/man/man9/runqueue.9 (revision 344855) @@ -1,137 +1,136 @@ .\" Copyright (c) 2000-2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd August 15, 2010 .Dt RUNQUEUE 9 .Os .Sh NAME .Nm choosethread , .Nm procrunnable , .Nm remrunqueue , .Nm setrunqueue .Nd manage the queue of runnable processes .Sh SYNOPSIS .In sys/param.h .In sys/proc.h .Vt "extern struct rq itqueues[]" ; .Vt "extern struct rq rtqueues[]" ; .Vt "extern struct rq queues[]" ; .Vt "extern struct rq idqueues[]" ; .Ft struct thread * .Fn choosethread "void" .Ft int .Fn procrunnable "void" .Ft void .Fn remrunqueue "struct thread *td" .Ft void .Fn setrunqueue "struct thread *td" .Sh DESCRIPTION The run queue consists of four priority queues: .Va itqueues for interrupt threads, .Va rtqueues for realtime priority processes, .Va queues for time sharing processes, and .Va idqueues for idle priority processes. Each priority queue consists of an array of .Dv NQS queue header structures. Each queue header identifies a list of runnable processes of equal priority. Each queue also has a single word that contains a bit mask identifying non-empty queues to assist in selecting a process quickly. These are named .Va itqueuebits , .Va rtqueuebits , .Va queuebits , and .Va idqueuebits . The run queues are protected by the .Va sched_lock mutex. .Pp .Fn procrunnable returns zero if there are no runnable processes other than the idle process. If there is at least one runnable process other than the idle process, it will return a non-zero value. Note that the .Va sched_lock mutex does .Em not need to be held when this function is called. There is a small race window where one CPU may place a process on the run queue when there are currently no other runnable processes while another CPU is calling this function. In that case the second CPU will simply travel through the idle loop one additional time before noticing that there is a runnable process. This works because idle CPUs are not halted in SMP systems. If idle CPUs are halted in SMP systems, then this race condition might have more serious repercussions in the losing case, and .Fn procrunnable may have to require that the .Va sched_lock mutex be acquired. .Pp .Fn choosethread returns the highest priority runnable thread. If there are no runnable threads, then the idle thread is returned. This function is called by .Fn cpu_switch and .Fn cpu_throw to determine which thread to switch to. .Fn choosethread must be called with the .Va sched_lock mutex held. .Pp .Fn setrunqueue adds the thread .Fa td to the tail of the appropriate queue in the proper priority queue. The thread must be runnable, i.e.\& .Va p_stat must be set to .Dv SRUN . This function must be called with the .Va sched_lock mutex held. .Pp .Fn remrunqueue removes thread .Fa td from its run queue. If .Fa td is not on a run queue, then the kernel will .Xr panic 9 . This function must be called with the .Va sched_lock mutex held. .Sh SEE ALSO .Xr cpu_switch 9 , .Xr scheduler 9 , .Xr sleepqueue 9 Index: head/share/man/man9/scheduler.9 =================================================================== --- head/share/man/man9/scheduler.9 (revision 344854) +++ head/share/man/man9/scheduler.9 (revision 344855) @@ -1,276 +1,275 @@ .\" Copyright (c) 2000-2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd November 3, 2000 .Dt SCHEDULER 9 .Os .Sh NAME .Nm curpriority_cmp , .Nm maybe_resched , .Nm resetpriority , .Nm roundrobin , .Nm roundrobin_interval , .Nm sched_setup , .Nm schedclock , .Nm schedcpu , .Nm setrunnable , .Nm updatepri .Nd perform round-robin scheduling of runnable processes .Sh SYNOPSIS .In sys/param.h .In sys/proc.h .Ft int .Fn curpriority_cmp "struct proc *p" .Ft void .Fn maybe_resched "struct thread *td" .Ft void .Fn propagate_priority "struct proc *p" .Ft void .Fn resetpriority "struct ksegrp *kg" .Ft void .Fn roundrobin "void *arg" .Ft int .Fn roundrobin_interval "void" .Ft void .Fn sched_setup "void *dummy" .Ft void .Fn schedclock "struct thread *td" .Ft void .Fn schedcpu "void *arg" .Ft void .Fn setrunnable "struct thread *td" .Ft void .Fn updatepri "struct thread *td" .Sh DESCRIPTION Each process has three different priorities stored in .Vt "struct proc" : .Va p_usrpri , .Va p_nativepri , and .Va p_priority . .Pp The .Va p_usrpri member is the user priority of the process calculated from a process' estimated CPU time and nice level. .Pp The .Va p_nativepri member is the saved priority used by .Fn propagate_priority . When a process obtains a mutex, its priority is saved in .Va p_nativepri . While it holds the mutex, the process's priority may be bumped by another process that blocks on the mutex. When the process releases the mutex, then its priority is restored to the priority saved in .Va p_nativepri . .Pp The .Va p_priority member is the actual priority of the process and is used to determine what .Xr runqueue 9 it runs on, for example. .Pp The .Fn curpriority_cmp function compares the cached priority of the currently running process with process .Fa p . If the currently running process has a higher priority, then it will return a value less than zero. If the current process has a lower priority, then it will return a value greater than zero. If the current process has the same priority as .Fa p , then .Fn curpriority_cmp will return zero. The cached priority of the currently running process is updated when a process resumes from .Xr tsleep 9 or returns to userland in .Fn userret and is stored in the private variable .Va curpriority . .Pp The .Fn maybe_resched function compares the priorities of the current thread and .Fa td . If .Fa td has a higher priority than the current thread, then a context switch is needed, and .Dv KEF_NEEDRESCHED is set. .Pp The .Fn propagate_priority looks at the process that owns the mutex .Fa p is blocked on. That process's priority is bumped to the priority of .Fa p if needed. If the process is currently running, then the function returns. If the process is on a .Xr runqueue 9 , then the process is moved to the appropriate .Xr runqueue 9 for its new priority. If the process is blocked on a mutex, its position in the list of processes blocked on the mutex in question is updated to reflect its new priority. Then, the function repeats the procedure using the process that owns the mutex just encountered. Note that a process's priorities are only bumped to the priority of the original process .Fa p , not to the priority of the previously encountered process. .Pp The .Fn resetpriority function recomputes the user priority of the ksegrp .Fa kg (stored in .Va kg_user_pri ) and calls .Fn maybe_resched to force a reschedule of each thread in the group if needed. .Pp The .Fn roundrobin function is used as a .Xr timeout 9 function to force a reschedule every .Va sched_quantum ticks. .Pp The .Fn roundrobin_interval function simply returns the number of clock ticks in between reschedules triggered by .Fn roundrobin . Thus, all it does is return the current value of .Va sched_quantum . .Pp The .Fn sched_setup function is a .Xr SYSINIT 9 that is called to start the callout driven scheduler functions. It just calls the .Fn roundrobin and .Fn schedcpu functions for the first time. After the initial call, the two functions will propagate themselves by registering their callout event again at the completion of the respective function. .Pp The .Fn schedclock function is called by .Fn statclock to adjust the priority of the currently running thread's ksegrp. It updates the group's estimated CPU time and then adjusts the priority via .Fn resetpriority . .Pp The .Fn schedcpu function updates all process priorities. First, it updates statistics that track how long processes have been in various process states. Secondly, it updates the estimated CPU time for the current process such that about 90% of the CPU usage is forgotten in 5 * load average seconds. For example, if the load average is 2.00, then at least 90% of the estimated CPU time for the process should be based on the amount of CPU time the process has had in the last 10 seconds. It then recomputes the priority of the process and moves it to the appropriate .Xr runqueue 9 if necessary. Thirdly, it updates the %CPU estimate used by utilities such as .Xr ps 1 and .Xr top 1 so that 95% of the CPU usage is forgotten in 60 seconds. Once all process priorities have been updated, .Fn schedcpu calls .Fn vmmeter to update various other statistics including the load average. Finally, it schedules itself to run again in .Va hz clock ticks. .Pp The .Fn setrunnable function is used to change a process's state to be runnable. The process is placed on a .Xr runqueue 9 if needed, and the swapper process is woken up and told to swap the process in if the process is swapped out. If the process has been asleep for at least one run of .Fn schedcpu , then .Fn updatepri is used to adjust the priority of the process. .Pp The .Fn updatepri function is used to adjust the priority of a process that has been asleep. It retroactively decays the estimated CPU time of the process for each .Fn schedcpu event that the process was asleep. Finally, it calls .Fn resetpriority to adjust the priority of the process. .Sh SEE ALSO .Xr mi_switch 9 , .Xr runqueue 9 , .Xr sleepqueue 9 , .Xr tsleep 9 .Sh BUGS The .Va curpriority variable really should be per-CPU. In addition, .Fn maybe_resched should compare the priority of .Fa chk with that of each CPU, and then send an IPI to the processor with the lowest priority to trigger a reschedule if needed. .Pp Priority propagation is broken and is thus disabled by default. The .Va p_nativepri variable is only updated if a process does not obtain a sleep mutex on the first try. Also, if a process obtains more than one sleep mutex in this manner, and had its priority bumped in between, then .Va p_nativepri will be clobbered. Index: head/share/man/man9/sleepqueue.9 =================================================================== --- head/share/man/man9/sleepqueue.9 (revision 344854) +++ head/share/man/man9/sleepqueue.9 (revision 344855) @@ -1,390 +1,389 @@ .\" Copyright (c) 2000-2004 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd September 22, 2014 .Dt SLEEPQUEUE 9 .Os .Sh NAME .Nm init_sleepqueues , .Nm sleepq_abort , .Nm sleepq_add , .Nm sleepq_alloc , .Nm sleepq_broadcast , .Nm sleepq_free , .Nm sleepq_lock , .Nm sleepq_lookup , .Nm sleepq_release , .Nm sleepq_remove , .Nm sleepq_signal , .Nm sleepq_set_timeout , .Nm sleepq_set_timeout_sbt , .Nm sleepq_sleepcnt , .Nm sleepq_timedwait , .Nm sleepq_timedwait_sig , .Nm sleepq_type , .Nm sleepq_wait , .Nm sleepq_wait_sig .Nd manage the queues of sleeping threads .Sh SYNOPSIS .In sys/param.h .In sys/sleepqueue.h .Ft void .Fn init_sleepqueues "void" .Ft int .Fn sleepq_abort "struct thread *td" .Ft void .Fn sleepq_add "void *wchan" "struct lock_object *lock" "const char *wmesg" "int flags" "int queue" .Ft struct sleepqueue * .Fn sleepq_alloc "void" .Ft int .Fn sleepq_broadcast "void *wchan" "int flags" "int pri" "int queue" .Ft void .Fn sleepq_free "struct sleepqueue *sq" .Ft struct sleepqueue * .Fn sleepq_lookup "void *wchan" .Ft void .Fn sleepq_lock "void *wchan" .Ft void .Fn sleepq_release "void *wchan" .Ft void .Fn sleepq_remove "struct thread *td" "void *wchan" .Ft int .Fn sleepq_signal "void *wchan" "int flags" "int pri" "int queue" .Ft void .Fn sleepq_set_timeout "void *wchan" "int timo" .Ft void .Fn sleepq_set_timeout_sbt "void *wchan" "sbintime_t sbt" \ "sbintime_t pr" "int flags" .Ft u_int .Fn sleepq_sleepcnt "void *wchan" "int queue" .Ft int .Fn sleepq_timedwait "void *wchan" "int pri" .Ft int .Fn sleepq_timedwait_sig "void *wchan" "int pri" .Ft int .Fn sleepq_type "void *wchan" .Ft void .Fn sleepq_wait "void *wchan" "int pri" .Ft int .Fn sleepq_wait_sig "void *wchan" "int pri" .Sh DESCRIPTION Sleep queues provide a mechanism for suspending execution of a thread until some condition is met. Each queue is associated with a specific wait channel when it is active, and only one queue may be associated with a wait channel at any given point in time. The implementation of each wait channel splits its sleepqueue into 2 sub-queues in order to enable some optimizations on threads' wakeups. An active queue holds a list of threads that are blocked on the associated wait channel. Threads that are not blocked on a wait channel have an associated inactive sleep queue. When a thread blocks on a wait channel it donates its inactive sleep queue to the wait channel. When a thread is resumed, the wait channel that it was blocked on gives it an inactive sleep queue for later use. .Pp The .Fn sleepq_alloc function allocates an inactive sleep queue and is used to assign a sleep queue to a thread during thread creation. The .Fn sleepq_free function frees the resources associated with an inactive sleep queue and is used to free a queue during thread destruction. .Pp Active sleep queues are stored in a hash table hashed on the addresses pointed to by wait channels. Each bucket in the hash table contains a sleep queue chain. A sleep queue chain contains a spin mutex and a list of sleep queues that hash to that specific chain. Active sleep queues are protected by their chain's spin mutex. The .Fn init_sleepqueues function initializes the hash table of sleep queue chains. .Pp The .Fn sleepq_lock function locks the sleep queue chain associated with wait channel .Fa wchan . .Pp The .Fn sleepq_lookup returns a pointer to the currently active sleep queue for that wait channel associated with .Fa wchan or .Dv NULL if there is no active sleep queue associated with argument .Fa wchan . It requires the sleep queue chain associated with .Fa wchan to have been locked by a prior call to .Fn sleepq_lock . .Pp The .Fn sleepq_release function unlocks the sleep queue chain associated with .Fn wchan and is primarily useful when aborting a pending sleep request before one of the wait functions is called. .Pp The .Fn sleepq_add function places the current thread on the sleep queue associated with the wait channel .Fa wchan . The sleep queue chain associated with argument .Fa wchan must be locked by a prior call to .Fn sleepq_lock when this function is called. If a lock is specified via the .Fa lock argument, and if the kernel was compiled with .Cd "options INVARIANTS" , then the sleep queue code will perform extra checks to ensure that the lock is used by all threads sleeping on .Fa wchan . The .Fa wmesg parameter should be a short description of .Fa wchan . The .Fa flags parameter is a bitmask consisting of the type of sleep queue being slept on and zero or more optional flags. The .Fa queue parameter specifies the sub-queue, in which the contending thread will be inserted. .Pp There are currently three types of sleep queues: .Pp .Bl -tag -width ".Dv SLEEPQ_CONDVAR" -compact .It Dv SLEEPQ_CONDVAR A sleep queue used to implement condition variables. .It Dv SLEEPQ_SLEEP A sleep queue used to implement .Xr sleep 9 , .Xr wakeup 9 and .Xr wakeup_one 9 . .It Dv SLEEPQ_PAUSE A sleep queue used to implement .Xr pause 9 . .El .Pp There are currently two optional flag: .Pp .Bl -tag -width ".Dv SLEEPQ_INTERRUPTIBLE" -compact .It Dv SLEEPQ_INTERRUPTIBLE The current thread is entering an interruptible sleep. .El .Bl -tag -width ".Dv SLEEPQ_STOP_ON_BDRY" -compact .It Dv SLEEPQ_STOP_ON_BDRY When thread is entering an interruptible sleep, do not stop it upon arrival of stop action, like .Dv SIGSTOP . Wake it up instead. .El .Pp A timeout on the sleep may be specified by calling .Fn sleepq_set_timeout after .Fn sleepq_add . The .Fa wchan parameter should be the same value from the preceding call to .Fn sleepq_add , and the sleep queue chain associated with .Fa wchan must have been locked by a prior call to .Fn sleepq_lock . The .Fa timo parameter should specify the timeout value in ticks. .Pp .Fn sleepq_set_timeout_sbt function takes .Fa sbt argument instead of .Fa timo . It allows to specify relative or absolute wakeup time with higher resolution in form of .Vt sbintime_t . The parameter .Fa pr allows to specify wanted absolute event precision. The parameter .Fa flags allows to pass additional .Fn callout_reset_sbt flags. .Pp Once the thread is ready to suspend, one of the wait functions is called to put the current thread to sleep until it is awakened and to context switch to another thread. The .Fn sleepq_wait function is used for non-interruptible sleeps that do not have a timeout. The .Fn sleepq_timedwait function is used for non-interruptible sleeps that have had a timeout set via .Fn sleepq_set_timeout . The .Fn sleepq_wait_sig function is used for interruptible sleeps that do not have a timeout. The .Fn sleepq_timedwait_sig function is used for interruptible sleeps that do have a timeout set. The .Fa wchan argument to all of the wait functions is the wait channel being slept on. The sleep queue chain associated with argument .Fa wchan needs to have been locked with a prior call to .Fn sleepq_lock . The .Fa pri argument is used to set the priority of the thread when it is awakened. If it is set to zero, the thread's priority is left alone. .Pp When the thread is resumed, the wait functions return a non-zero value if the thread was awakened due to an interrupt other than a signal or a timeout. If the sleep timed out, then .Er EWOULDBLOCK is returned. If the sleep was interrupted by something other than a signal, then some other return value will be returned. .Pp A sleeping thread is normally resumed by the .Fn sleepq_broadcast and .Fn sleepq_signal functions. The .Fn sleepq_signal function awakens the highest priority thread sleeping on a wait channel while .Fn sleepq_broadcast awakens all of the threads sleeping on a wait channel. The .Fa wchan argument specifics which wait channel to awaken. The .Fa flags argument must match the sleep queue type contained in the .Fa flags argument passed to .Fn sleepq_add by the threads sleeping on the wait channel. If the .Fa pri argument does not equal \-1, then each thread that is awakened will have its priority raised to .Fa pri if it has a lower priority. The sleep queue chain associated with argument .Fa wchan must be locked by a prior call to .Fn sleepq_lock before calling any of these functions. The .Fa queue argument specifies the sub-queue, from which threads need to be woken up. .Pp A thread in an interruptible sleep can be interrupted by another thread via the .Fn sleepq_abort function. The .Fa td argument specifies the thread to interrupt. An individual thread can also be awakened from sleeping on a specific wait channel via the .Fn sleepq_remove function. The .Fa td argument specifies the thread to awaken and the .Fa wchan argument specifies the wait channel to awaken it from. If the thread .Fa td is not blocked on the wait channel .Fa wchan then this function will not do anything, even if the thread is asleep on a different wait channel. This function should only be used if one of the other functions above is not sufficient. One possible use is waking up a specific thread from a widely shared sleep channel. .Pp The .Fn sleepq_sleepcnt function offer a simple way to retrieve the number of threads sleeping for the specified .Fa queue , given a .Fa wchan . .Pp The .Fn sleepq_type function returns the type of .Fa wchan associated to a sleepqueue. .Pp The .Fn sleepq_abort , .Fn sleepq_broadcast , and .Fn sleepq_signal functions all return a boolean value. If the return value is true, then at least one thread was resumed that is currently swapped out. The caller is responsible for awakening the scheduler process so that the resumed thread will be swapped back in. This is done by calling the .Fn kick_proc0 function after releasing the sleep queue chain lock via a call to .Fn sleepq_release . .Pp The sleep queue interface is currently used to implement the .Xr sleep 9 and .Xr condvar 9 interfaces. Almost all other code in the kernel should use one of those interfaces rather than manipulating sleep queues directly. .Sh SEE ALSO .Xr condvar 9 , .Xr runqueue 9 , .Xr scheduler 9 , .Xr sleep 9 , .Xr timeout 9 Index: head/share/man/man9/swi.9 =================================================================== --- head/share/man/man9/swi.9 (revision 344854) +++ head/share/man/man9/swi.9 (revision 344855) @@ -1,250 +1,249 @@ .\" Copyright (c) 2000-2001 John H. Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd April 19, 2012 .Dt SWI 9 .Os .Sh NAME .Nm swi_add , .Nm swi_remove , .Nm swi_sched .Nd register and schedule software interrupt handlers .Sh SYNOPSIS .In sys/param.h .In sys/bus.h .In sys/interrupt.h .Vt "extern struct intr_event *tty_intr_event" ; .Vt "extern struct intr_event *clk_intr_event" ; .Vt "extern void *vm_ih" ; .Ft int .Fo swi_add .Fa "struct intr_event **eventp" .Fa "const char *name" .Fa "driver_intr_t handler" .Fa "void *arg" .Fa "int pri" .Fa "enum intr_type flags" .Fa "void **cookiep" .Fc .Ft int .Fn swi_remove "void *cookie" .Ft void .Fn swi_sched "void *cookie" "int flags" .Sh DESCRIPTION These functions are used to register and schedule software interrupt handlers. Software interrupt handlers are attached to a software interrupt thread, just as hardware interrupt handlers are attached to a hardware interrupt thread. Multiple handlers can be attached to the same thread. Software interrupt handlers can be used to queue up less critical processing inside of hardware interrupt handlers so that the work can be done at a later time. Software interrupt threads are different from other kernel threads in that they are treated as an interrupt thread. This means that time spent executing these threads is counted as interrupt time, and that they can be run via a lightweight context switch. .Pp The .Fn swi_add function is used to add a new software interrupt handler to a specified interrupt event. The .Fa eventp argument is an optional pointer to a .Vt struct intr_event pointer. If this argument points to an existing event that holds a list of interrupt handlers, then this handler will be attached to that event. Otherwise a new event will be created, and if .Fa eventp is not .Dv NULL , then the pointer at that address to will be modified to point to the newly created event. The .Fa name argument is used to associate a name with a specific handler. This name is appended to the name of the software interrupt thread that this handler is attached to. The .Fa handler argument is the function that will be executed when the handler is scheduled to run. The .Fa arg parameter will be passed in as the only parameter to .Fa handler when the function is executed. The .Fa pri value specifies the priority of this interrupt handler relative to other software interrupt handlers. If an interrupt event is created, then this value is used as the vector, and the .Fa flags argument is used to specify the attributes of a handler such as .Dv INTR_MPSAFE . The .Fa cookiep argument points to a .Vt void * cookie. This cookie will be set to a value that uniquely identifies this handler, and is used to schedule the handler for execution later on. .Pp The .Fn swi_remove function is used to teardown an interrupt handler pointed to by the .Fa cookie argument. It detaches the interrupt handler from the associated interrupt event and frees its memory. .Pp The .Fn swi_sched function is used to schedule an interrupt handler and its associated thread to run. The .Fa cookie argument specifies which software interrupt handler should be scheduled to run. The .Fa flags argument specifies how and when the handler should be run and is a mask of one or more of the following flags: .Bl -tag -width SWI_DELAY .It Dv SWI_DELAY Specifies that the kernel should mark the specified handler as needing to run, but the kernel should not schedule the software interrupt thread to run. Instead, .Fa handler will be executed the next time that the software interrupt thread runs after being scheduled by another event. Attaching a handler to the clock software interrupt thread and using this flag when scheduling a software interrupt handler can be used to implement the functionality performed by .Fn setdelayed in earlier versions of .Fx . .El .Pp The .Va tty_intr_event and .Va clk_intr_event variables contain pointers to the software interrupt handlers for the tty and clock software interrupts, respectively. .Va tty_intr_event is used to hang tty software interrupt handlers off of the same thread. .Va clk_intr_event is used to hang delayed handlers off of the clock software interrupt thread so that the functionality of .Fn setdelayed can be obtained in conjunction with .Dv SWI_DELAY . The .Va vm_ih handler cookie is used to schedule software interrupt threads to run for the VM subsystem. .Sh RETURN VALUES The .Fn swi_add and .Fn swi_remove functions return zero on success and non-zero on failure. .Sh ERRORS The .Fn swi_add function will fail if: .Bl -tag -width Er .It Bq Er EAGAIN The system-imposed limit on the total number of processes under execution would be exceeded. The limit is given by the .Xr sysctl 3 MIB variable .Dv KERN_MAXPROC . .It Bq Er EINVAL The .Fa flags argument specifies .Dv INTR_ENTROPY . .It Bq Er EINVAL The .Fa eventp argument points to a hardware interrupt thread. .It Bq Er EINVAL Either of the .Fa name or .Fa handler arguments are .Dv NULL . .It Bq Er EINVAL The .Dv INTR_EXCL flag is specified and the interrupt event pointed to by .Fa eventp already has at least one handler, or the interrupt event already has an exclusive handler. .El .Pp The .Fn swi_remove function will fail if: .Bl -tag -width Er .It Bq Er EINVAL A software interrupt handler pointed to by .Fa cookie is .Dv NULL . .El .Sh SEE ALSO .Xr ithread 9 , .Xr taskqueue 9 .Sh HISTORY The .Fn swi_add and .Fn swi_sched functions first appeared in .Fx 5.0 . They replaced the .Fn register_swi function which appeared in .Fx 3.0 and the .Fn setsoft* , and .Fn schedsoft* functions which date back to at least .Bx 4.4 . The .Fn swi_remove function first appeared in .Fx 6.1 . .Sh BUGS Most of the global variables described in this manual page should not be global, or at the very least should not be declared in .In sys/interrupt.h . Index: head/stand/efi/libefi/devpath.c =================================================================== --- head/stand/efi/libefi/devpath.c (revision 344854) +++ head/stand/efi/libefi/devpath.c (revision 344855) @@ -1,231 +1,230 @@ /*- * Copyright (c) 2016 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include static EFI_GUID ImageDevicePathGUID = EFI_LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID; static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL; static EFI_GUID DevicePathToTextGUID = EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID; static EFI_DEVICE_PATH_TO_TEXT_PROTOCOL *textProtocol; EFI_DEVICE_PATH * efi_lookup_image_devpath(EFI_HANDLE handle) { EFI_DEVICE_PATH *devpath; EFI_STATUS status; status = BS->HandleProtocol(handle, &ImageDevicePathGUID, (VOID **)&devpath); if (EFI_ERROR(status)) devpath = NULL; return (devpath); } EFI_DEVICE_PATH * efi_lookup_devpath(EFI_HANDLE handle) { EFI_DEVICE_PATH *devpath; EFI_STATUS status; status = BS->HandleProtocol(handle, &DevicePathGUID, (VOID **)&devpath); if (EFI_ERROR(status)) devpath = NULL; return (devpath); } CHAR16 * efi_devpath_name(EFI_DEVICE_PATH *devpath) { static int once = 1; EFI_STATUS status; if (devpath == NULL) return (NULL); if (once) { status = BS->LocateProtocol(&DevicePathToTextGUID, NULL, (VOID **)&textProtocol); if (EFI_ERROR(status)) textProtocol = NULL; once = 0; } if (textProtocol == NULL) return (NULL); return (textProtocol->ConvertDevicePathToText(devpath, TRUE, TRUE)); } void efi_free_devpath_name(CHAR16 *text) { BS->FreePool(text); } EFI_DEVICE_PATH * efi_devpath_last_node(EFI_DEVICE_PATH *devpath) { if (IsDevicePathEnd(devpath)) return (NULL); while (!IsDevicePathEnd(NextDevicePathNode(devpath))) devpath = NextDevicePathNode(devpath); return (devpath); } EFI_DEVICE_PATH * efi_devpath_trim(EFI_DEVICE_PATH *devpath) { EFI_DEVICE_PATH *node, *copy; size_t prefix, len; if ((node = efi_devpath_last_node(devpath)) == NULL) return (NULL); prefix = (UINT8 *)node - (UINT8 *)devpath; if (prefix == 0) return (NULL); len = prefix + DevicePathNodeLength(NextDevicePathNode(node)); copy = malloc(len); if (copy != NULL) { memcpy(copy, devpath, prefix); node = (EFI_DEVICE_PATH *)((UINT8 *)copy + prefix); SetDevicePathEndNode(node); } return (copy); } EFI_HANDLE efi_devpath_handle(EFI_DEVICE_PATH *devpath) { EFI_STATUS status; EFI_HANDLE h; /* * There isn't a standard way to locate a handle for a given * device path. However, querying the EFI_DEVICE_PATH protocol * for a given device path should give us a handle for the * closest node in the path to the end that is valid. */ status = BS->LocateDevicePath(&DevicePathGUID, &devpath, &h); if (EFI_ERROR(status)) return (NULL); return (h); } bool efi_devpath_match_node(EFI_DEVICE_PATH *devpath1, EFI_DEVICE_PATH *devpath2) { size_t len; if (devpath1 == NULL || devpath2 == NULL) return (false); if (DevicePathType(devpath1) != DevicePathType(devpath2) || DevicePathSubType(devpath1) != DevicePathSubType(devpath2)) return (false); len = DevicePathNodeLength(devpath1); if (len != DevicePathNodeLength(devpath2)) return (false); if (memcmp(devpath1, devpath2, len) != 0) return (false); return (true); } bool efi_devpath_match(EFI_DEVICE_PATH *devpath1, EFI_DEVICE_PATH *devpath2) { if (devpath1 == NULL || devpath2 == NULL) return (false); while (true) { if (!efi_devpath_match_node(devpath1, devpath2)) return false; if (IsDevicePathEnd(devpath1)) break; devpath1 = NextDevicePathNode(devpath1); devpath2 = NextDevicePathNode(devpath2); } return (true); } bool efi_devpath_is_prefix(EFI_DEVICE_PATH *prefix, EFI_DEVICE_PATH *path) { size_t len; if (prefix == NULL || path == NULL) return (false); while (1) { if (IsDevicePathEnd(prefix)) break; if (DevicePathType(prefix) != DevicePathType(path) || DevicePathSubType(prefix) != DevicePathSubType(path)) return (false); len = DevicePathNodeLength(prefix); if (len != DevicePathNodeLength(path)) return (false); if (memcmp(prefix, path, len) != 0) return (false); prefix = NextDevicePathNode(prefix); path = NextDevicePathNode(path); } return (true); } /* * Skip over the 'prefix' part of path and return the part of the path * that starts with the first node that's a MEDIA_DEVICE_PATH. */ EFI_DEVICE_PATH * efi_devpath_to_media_path(EFI_DEVICE_PATH *path) { while (!IsDevicePathEnd(path)) { if (DevicePathType(path) == MEDIA_DEVICE_PATH) return (path); path = NextDevicePathNode(path); } return (NULL); } UINTN efi_devpath_length(EFI_DEVICE_PATH *path) { EFI_DEVICE_PATH *start = path; while (!IsDevicePathEnd(path)) path = NextDevicePathNode(path); return ((UINTN)path - (UINTN)start) + DevicePathNodeLength(path); } Index: head/stand/i386/cdboot/cdboot.S =================================================================== --- head/stand/i386/cdboot/cdboot.S (revision 344854) +++ head/stand/i386/cdboot/cdboot.S (revision 344855) @@ -1,594 +1,593 @@ # # Copyright (c) 2001 John Baldwin -# All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # # This program is a freestanding boot program to load an a.out binary # from a CD-ROM booted with no emulation mode as described by the El # Torito standard. Due to broken BIOSen that do not load the desired # number of sectors, we try to fit this in as small a space as possible. # # Basically, we first create a set of boot arguments to pass to the loaded # binary. Then we attempt to load /boot/loader from the CD we were booted # off of. # #include # # Memory locations. # .set MEM_PAGE_SIZE,0x1000 # memory page size, 4k .set MEM_ARG,0x900 # Arguments at start .set MEM_ARG_BTX,0xa100 # Where we move them to so the # BTX client can see them .set MEM_ARG_SIZE,0x18 # Size of the arguments .set MEM_BTX_ADDRESS,0x9000 # where BTX lives .set MEM_BTX_ENTRY,0x9010 # where BTX starts to execute .set MEM_BTX_OFFSET,MEM_PAGE_SIZE # offset of BTX in the loader .set MEM_BTX_CLIENT,0xa000 # where BTX clients live # # a.out header fields # .set AOUT_TEXT,0x04 # text segment size .set AOUT_DATA,0x08 # data segment size .set AOUT_BSS,0x0c # zero'd BSS size .set AOUT_SYMBOLS,0x10 # symbol table .set AOUT_ENTRY,0x14 # entry point .set AOUT_HEADER,MEM_PAGE_SIZE # size of the a.out header # # Segment selectors. # .set SEL_SDATA,0x8 # Supervisor data .set SEL_RDATA,0x10 # Real mode data .set SEL_SCODE,0x18 # PM-32 code .set SEL_SCODE16,0x20 # PM-16 code # # BTX constants # .set INT_SYS,0x30 # BTX syscall interrupt # # Constants for reading from the CD. # .set ERROR_TIMEOUT,0x80 # BIOS timeout on read .set NUM_RETRIES,3 # Num times to retry .set SECTOR_SIZE,0x800 # size of a sector .set SECTOR_SHIFT,11 # number of place to shift .set BUFFER_LEN,0x100 # number of sectors in buffer .set MAX_READ,0x10000 # max we can read at a time .set MAX_READ_SEC,MAX_READ >> SECTOR_SHIFT .set MEM_READ_BUFFER,0x9000 # buffer to read from CD .set MEM_VOLDESC,MEM_READ_BUFFER # volume descriptor .set MEM_DIR,MEM_VOLDESC+SECTOR_SIZE # Lookup buffer .set VOLDESC_LBA,0x10 # LBA of vol descriptor .set VD_PRIMARY,1 # Primary VD .set VD_END,255 # VD Terminator .set VD_ROOTDIR,156 # Offset of Root Dir Record .set DIR_LEN,0 # Offset of Dir Record length .set DIR_EA_LEN,1 # Offset of EA length .set DIR_EXTENT,2 # Offset of 64-bit LBA .set DIR_SIZE,10 # Offset of 64-bit length .set DIR_NAMELEN,32 # Offset of 8-bit name len .set DIR_NAME,33 # Offset of dir name # # We expect to be loaded by the BIOS at 0x7c00 (standard boot loader entry # point) # .code16 .globl start .org 0x0, 0x0 # # Program start. # start: cld # string ops inc xor %ax,%ax # zero %ax mov %ax,%ss # setup the mov $start,%sp # stack mov %ax,%ds # setup the mov %ax,%es # data segments mov %dl,drive # Save BIOS boot device mov $msg_welcome,%si # %ds:(%si) -> welcome message call putstr # display the welcome message # # Setup the arguments that the loader is expecting from boot[12] # mov $msg_bootinfo,%si # %ds:(%si) -> boot args message call putstr # display the message mov $MEM_ARG,%bx # %ds:(%bx) -> boot args mov %bx,%di # %es:(%di) -> boot args xor %eax,%eax # zero %eax mov $(MEM_ARG_SIZE/4),%cx # Size of arguments in 32-bit # dwords rep # Clear the arguments stosl # to zero mov drive,%dl # Store BIOS boot device mov %dl,0x4(%bx) # in kargs->bootdev orb $KARGS_FLAGS_CD,0x8(%bx) # kargs->bootflags |= # KARGS_FLAGS_CD # # Load Volume Descriptor # mov $VOLDESC_LBA,%eax # Set LBA of first VD load_vd: push %eax # Save %eax mov $1,%dh # One sector mov $MEM_VOLDESC,%ebx # Destination call read # Read it in cmpb $VD_PRIMARY,(%bx) # Primary VD? je have_vd # Yes pop %eax # Prepare to inc %eax # try next cmpb $VD_END,(%bx) # Last VD? jne load_vd # No, read next mov $msg_novd,%si # No VD jmp error # Halt have_vd: # Have Primary VD # # Try to look up the loader binary using the paths in the loader_paths # array. # mov $loader_paths,%si # Point to start of array lookup_path: push %si # Save file name pointer call lookup # Try to find file pop %di # Restore file name pointer jnc lookup_found # Found this file xor %al,%al # Look for next mov $0xffff,%cx # path name by repnz # scanning for scasb # nul char mov %di,%si # Point %si at next path mov (%si),%al # Get first char of next path or %al,%al # Is it double nul? jnz lookup_path # No, try it. mov $msg_failed,%si # Failed message jmp error # Halt lookup_found: # Found a loader file # # Load the binary into the buffer. Due to real mode addressing limitations # we have to read it in 64k chunks. # mov DIR_SIZE(%bx),%eax # Read file length add $SECTOR_SIZE-1,%eax # Convert length to sectors shr $SECTOR_SHIFT,%eax cmp $BUFFER_LEN,%eax jbe load_sizeok mov $msg_load2big,%si # Error message call error load_sizeok: movzbw %al,%cx # Num sectors to read mov DIR_EXTENT(%bx),%eax # Load extent xor %edx,%edx mov DIR_EA_LEN(%bx),%dl add %edx,%eax # Skip extended mov $MEM_READ_BUFFER,%ebx # Read into the buffer load_loop: mov %cl,%dh cmp $MAX_READ_SEC,%cl # Truncate to max read size jbe load_notrunc mov $MAX_READ_SEC,%dh load_notrunc: sub %dh,%cl # Update count push %eax # Save call read # Read it in pop %eax # Restore add $MAX_READ_SEC,%eax # Update LBA add $MAX_READ,%ebx # Update dest addr jcxz load_done # Done? jmp load_loop # Keep going load_done: # # Turn on the A20 address line # call seta20 # Turn A20 on # # Relocate the loader and BTX using a very lazy protected mode # mov $msg_relocate,%si # Display the call putstr # relocation message mov MEM_READ_BUFFER+AOUT_ENTRY,%edi # %edi is the destination mov $(MEM_READ_BUFFER+AOUT_HEADER),%esi # %esi is # the start of the text # segment mov MEM_READ_BUFFER+AOUT_TEXT,%ecx # %ecx = length of the text # segment push %edi # Save entry point for later lgdt gdtdesc # setup our own gdt cli # turn off interrupts mov %cr0,%eax # Turn on or $0x1,%al # protected mov %eax,%cr0 # mode ljmp $SEL_SCODE,$pm_start # long jump to clear the # instruction pre-fetch queue .code32 pm_start: mov $SEL_SDATA,%ax # Initialize mov %ax,%ds # %ds and mov %ax,%es # %es to a flat selector rep # Relocate the movsb # text segment add $(MEM_PAGE_SIZE - 1),%edi # pad %edi out to a new page and $~(MEM_PAGE_SIZE - 1),%edi # for the data segment mov MEM_READ_BUFFER+AOUT_DATA,%ecx # size of the data segment rep # Relocate the movsb # data segment mov MEM_READ_BUFFER+AOUT_BSS,%ecx # size of the bss xor %eax,%eax # zero %eax add $3,%cl # round %ecx up to shr $2,%ecx # a multiple of 4 rep # zero the stosl # bss mov MEM_READ_BUFFER+AOUT_ENTRY,%esi # %esi -> relocated loader add $MEM_BTX_OFFSET,%esi # %esi -> BTX in the loader mov $MEM_BTX_ADDRESS,%edi # %edi -> where BTX needs to go movzwl 0xa(%esi),%ecx # %ecx -> length of BTX rep # Relocate movsb # BTX ljmp $SEL_SCODE16,$pm_16 # Jump to 16-bit PM .code16 pm_16: mov $SEL_RDATA,%ax # Initialize mov %ax,%ds # %ds and mov %ax,%es # %es to a real mode selector mov %cr0,%eax # Turn off and $~0x1,%al # protected mov %eax,%cr0 # mode ljmp $0,$pm_end # Long jump to clear the # instruction pre-fetch queue pm_end: sti # Turn interrupts back on now # # Copy the BTX client to MEM_BTX_CLIENT # xor %ax,%ax # zero %ax and set mov %ax,%ds # %ds and %es mov %ax,%es # to segment 0 mov $MEM_BTX_CLIENT,%di # Prepare to relocate mov $btx_client,%si # the simple btx client mov $(btx_client_end-btx_client),%cx # length of btx client rep # Relocate the movsb # simple BTX client # # Copy the boot[12] args to where the BTX client can see them # mov $MEM_ARG,%si # where the args are at now mov $MEM_ARG_BTX,%di # where the args are moving to mov $(MEM_ARG_SIZE/4),%cx # size of the arguments in longs rep # Relocate movsl # the words # # Save the entry point so the client can get to it later on # pop %eax # Restore saved entry point stosl # and add it to the end of # the arguments # # Now we just start up BTX and let it do the rest # mov $msg_jump,%si # Display the call putstr # jump message ljmp $0,$MEM_BTX_ENTRY # Jump to the BTX entry point # # Lookup the file in the path at [SI] from the root directory. # # Trashes: All but BX # Returns: CF = 0 (success), BX = pointer to record # CF = 1 (not found) # lookup: mov $VD_ROOTDIR+MEM_VOLDESC,%bx # Root directory record push %si mov $msg_lookup,%si # Display lookup message call putstr pop %si push %si call putstr mov $msg_lookup2,%si call putstr pop %si lookup_dir: lodsb # Get first char of path cmp $0,%al # Are we done? je lookup_done # Yes cmp $'/',%al # Skip path separator. je lookup_dir dec %si # Undo lodsb side effect call find_file # Lookup first path item jnc lookup_dir # Try next component mov $msg_lookupfail,%si # Not found message call putstr stc # Set carry ret jmp error lookup_done: mov $msg_lookupok,%si # Success message call putstr clc # Clear carry ret # # Lookup file at [SI] in directory whose record is at [BX]. # # Trashes: All but returns # Returns: CF = 0 (success), BX = pointer to record, SI = next path item # CF = 1 (not found), SI = preserved # find_file: mov DIR_EXTENT(%bx),%eax # Load extent xor %edx,%edx mov DIR_EA_LEN(%bx),%dl add %edx,%eax # Skip extended attributes mov %eax,rec_lba # Save LBA mov DIR_SIZE(%bx),%eax # Save size mov %eax,rec_size xor %cl,%cl # Zero length push %si # Save ff.namelen: inc %cl # Update length lodsb # Read char cmp $0,%al # Nul? je ff.namedone # Yes cmp $'/',%al # Path separator? jnz ff.namelen # No, keep going ff.namedone: dec %cl # Adjust length and save mov %cl,name_len pop %si # Restore ff.load: mov rec_lba,%eax # Load LBA mov $MEM_DIR,%ebx # Address buffer mov $1,%dh # One sector call read # Read directory block incl rec_lba # Update LBA to next block ff.scan: mov %ebx,%edx # Check for EOF sub $MEM_DIR,%edx cmp %edx,rec_size ja ff.scan.1 stc # EOF reached ret ff.scan.1: cmpb $0,DIR_LEN(%bx) # Last record in block? je ff.nextblock push %si # Save movzbw DIR_NAMELEN(%bx),%si # Find end of string ff.checkver: cmpb $'0',DIR_NAME-1(%bx,%si) # Less than '0'? jb ff.checkver.1 cmpb $'9',DIR_NAME-1(%bx,%si) # Greater than '9'? ja ff.checkver.1 dec %si # Next char jnz ff.checkver jmp ff.checklen # All numbers in name, so # no version ff.checkver.1: movzbw DIR_NAMELEN(%bx),%cx cmp %cx,%si # Did we find any digits? je ff.checkdot # No cmpb $';',DIR_NAME-1(%bx,%si) # Check for semicolon jne ff.checkver.2 dec %si # Skip semicolon mov %si,%cx mov %cl,DIR_NAMELEN(%bx) # Adjust length jmp ff.checkdot ff.checkver.2: mov %cx,%si # Restore %si to end of string ff.checkdot: cmpb $'.',DIR_NAME-1(%bx,%si) # Trailing dot? jne ff.checklen # No decb DIR_NAMELEN(%bx) # Adjust length ff.checklen: pop %si # Restore movzbw name_len,%cx # Load length of name cmp %cl,DIR_NAMELEN(%bx) # Does length match? je ff.checkname # Yes, check name ff.nextrec: add DIR_LEN(%bx),%bl # Next record adc $0,%bh jmp ff.scan ff.nextblock: subl $SECTOR_SIZE,rec_size # Adjust size jnc ff.load # If subtract ok, keep going ret # End of file, so not found ff.checkname: lea DIR_NAME(%bx),%di # Address name in record push %si # Save repe cmpsb # Compare name je ff.match # We have a winner! pop %si # Restore jmp ff.nextrec # Keep looking. ff.match: add $2,%sp # Discard saved %si clc # Clear carry ret # # Load DH sectors starting at LBA EAX into [EBX]. # # Trashes: EAX # read: push %si # Save push %cx # Save since some BIOSs trash mov %eax,edd_lba # LBA to read from mov %ebx,%eax # Convert address shr $4,%eax # to segment mov %ax,edd_addr+0x2 # and store read.retry: call twiddle # Entertain the user push %dx # Save mov $edd_packet,%si # Address Packet mov %dh,edd_len # Set length mov drive,%dl # BIOS Device mov $0x42,%ah # BIOS: Extended Read int $0x13 # Call BIOS pop %dx # Restore jc read.fail # Worked? pop %cx # Restore pop %si ret # Return read.fail: cmp $ERROR_TIMEOUT,%ah # Timeout? je read.retry # Yes, Retry. read.error: mov %ah,%al # Save error mov $hex_error,%di # Format it call hex8 # as hex mov $msg_badread,%si # Display Read error message # # Display error message at [SI] and halt. # error: call putstr # Display message halt: hlt jmp halt # Spin # # Display a null-terminated string. # # Trashes: AX, SI # putstr: push %bx # Save putstr.load: lodsb # load %al from %ds:(%si) test %al,%al # stop at null jnz putstr.putc # if the char != null, output it pop %bx # Restore ret # return when null is hit putstr.putc: call putc # output char jmp putstr.load # next char # # Display a single char. # putc: mov $0x7,%bx # attribute for output mov $0xe,%ah # BIOS: put_char int $0x10 # call BIOS, print char in %al ret # Return to caller # # Output the "twiddle" # twiddle: push %ax # Save push %bx # Save mov twiddle_index,%al # Load index mov $twiddle_chars,%bx # Address table inc %al # Next and $3,%al # char mov %al,twiddle_index # Save index for next call xlat # Get char call putc # Output it mov $8,%al # Backspace call putc # Output it pop %bx # Restore pop %ax # Restore ret # # Enable A20. Put an upper limit on the amount of time we wait for the # keyboard controller to get ready (65K x ISA access time). If # we wait more than that amount, the hardware is probably # legacy-free and simply doesn't have a keyboard controller. # Thus, the A20 line is already enabled. # seta20: cli # Disable interrupts xor %cx,%cx # Clear seta20.1: inc %cx # Increment, overflow? jz seta20.3 # Yes in $0x64,%al # Get status test $0x2,%al # Busy? jnz seta20.1 # Yes mov $0xd1,%al # Command: Write out %al,$0x64 # output port seta20.2: in $0x64,%al # Get status test $0x2,%al # Busy? jnz seta20.2 # Yes mov $0xdf,%al # Enable out %al,$0x60 # A20 seta20.3: sti # Enable interrupts ret # To caller # # Convert AL to hex, saving the result to [EDI]. # hex8: pushl %eax # Save shrb $0x4,%al # Do upper call hex8.1 # 4 popl %eax # Restore hex8.1: andb $0xf,%al # Get lower 4 cmpb $0xa,%al # Convert sbbb $0x69,%al # to hex das # digit orb $0x20,%al # To lower case stosb # Save char ret # (Recursive) # # BTX client to start btxldr # .code32 btx_client: mov $(MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE-4), %esi # %ds:(%esi) -> end # of boot[12] args mov $(MEM_ARG_SIZE/4),%ecx # Number of words to push std # Go backwards push_arg: lodsl # Read argument push %eax # Push it onto the stack loop push_arg # Push all of the arguments cld # In case anyone depends on this pushl MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE # Entry point of # the loader push %eax # Emulate a near call mov $0x1,%eax # 'exec' system call int $INT_SYS # BTX system call btx_client_end: .code16 .p2align 4 # # Global descriptor table. # gdt: .word 0x0,0x0,0x0,0x0 # Null entry .word 0xffff,0x0,0x9200,0xcf # SEL_SDATA .word 0xffff,0x0,0x9200,0x0 # SEL_RDATA .word 0xffff,0x0,0x9a00,0xcf # SEL_SCODE (32-bit) .word 0xffff,0x0,0x9a00,0x8f # SEL_SCODE16 (16-bit) gdt.1: # # Pseudo-descriptors. # gdtdesc: .word gdt.1-gdt-1 # Limit .long gdt # Base # # EDD Packet # edd_packet: .byte 0x10 # Length .byte 0 # Reserved edd_len: .byte 0x0 # Num to read .byte 0 # Reserved edd_addr: .word 0x0,0x0 # Seg:Off edd_lba: .quad 0x0 # LBA drive: .byte 0 # # State for searching dir # rec_lba: .long 0x0 # LBA (adjusted for EA) rec_size: .long 0x0 # File size name_len: .byte 0x0 # Length of current name twiddle_index: .byte 0x0 msg_welcome: .asciz "CD Loader 1.2\r\n\n" msg_bootinfo: .asciz "Building the boot loader arguments\r\n" msg_relocate: .asciz "Relocating the loader and the BTX\r\n" msg_jump: .asciz "Starting the BTX loader\r\n" msg_badread: .ascii "Read Error: 0x" hex_error: .asciz "00\r\n" msg_novd: .asciz "Could not find Primary Volume Descriptor\r\n" msg_lookup: .asciz "Looking up " msg_lookup2: .asciz "... " msg_lookupok: .asciz "Found\r\n" msg_lookupfail: .asciz "File not found\r\n" msg_load2big: .asciz "File too big\r\n" msg_failed: .asciz "Boot failed\r\n" twiddle_chars: .ascii "|/-\\" loader_paths: .asciz "/BOOT/LOADER" .asciz "/boot/loader" .byte 0 Index: head/stand/i386/libi386/pxe.c =================================================================== --- head/stand/i386/libi386/pxe.c (revision 344854) +++ head/stand/i386/libi386/pxe.c (revision 344855) @@ -1,573 +1,573 @@ /*- * Copyright (c) 2000 Alfred Perlstein * Copyright (c) 2000 Paul Saab - * Copyright (c) 2000 John Baldwin * All rights reserved. + * Copyright (c) 2000 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libi386.h" #include "btxv86.h" #include "pxe.h" static pxenv_t *pxenv_p = NULL; /* PXENV+ */ static pxe_t *pxe_p = NULL; /* !PXE */ #ifdef PXE_DEBUG static int pxe_debug = 0; #endif void pxe_enable(void *pxeinfo); static void (*pxe_call)(int func, void *ptr); static void pxenv_call(int func, void *ptr); static void bangpxe_call(int func, void *ptr); static int pxe_init(void); static int pxe_print(int verbose); static void pxe_cleanup(void); static void pxe_perror(int error); static int pxe_netif_match(struct netif *nif, void *machdep_hint); static int pxe_netif_probe(struct netif *nif, void *machdep_hint); static void pxe_netif_init(struct iodesc *desc, void *machdep_hint); static ssize_t pxe_netif_get(struct iodesc *, void **, time_t); static ssize_t pxe_netif_put(struct iodesc *desc, void *pkt, size_t len); static void pxe_netif_end(struct netif *nif); extern struct netif_stats pxe_st[]; extern uint16_t __bangpxeseg; extern uint16_t __bangpxeoff; extern void __bangpxeentry(void); extern uint16_t __pxenvseg; extern uint16_t __pxenvoff; extern void __pxenventry(void); struct netif_dif pxe_ifs[] = { /* dif_unit dif_nsel dif_stats dif_private */ {0, 1, &pxe_st[0], 0} }; struct netif_stats pxe_st[nitems(pxe_ifs)]; struct netif_driver pxenetif = { .netif_bname = "pxenet", .netif_match = pxe_netif_match, .netif_probe = pxe_netif_probe, .netif_init = pxe_netif_init, .netif_get = pxe_netif_get, .netif_put = pxe_netif_put, .netif_end = pxe_netif_end, .netif_ifs = pxe_ifs, .netif_nifs = nitems(pxe_ifs) }; struct netif_driver *netif_drivers[] = { &pxenetif, NULL }; struct devsw pxedisk = { .dv_name = "net", .dv_type = DEVT_NET, .dv_init = pxe_init, .dv_strategy = NULL, /* Will be set in pxe_init */ .dv_open = NULL, /* Will be set in pxe_init */ .dv_close = NULL, /* Will be set in pxe_init */ .dv_ioctl = noioctl, .dv_print = pxe_print, .dv_cleanup = pxe_cleanup }; /* * This function is called by the loader to enable PXE support if we * are booted by PXE. The passed in pointer is a pointer to the PXENV+ * structure. */ void pxe_enable(void *pxeinfo) { pxenv_p = (pxenv_t *)pxeinfo; pxe_p = (pxe_t *)PTOV(pxenv_p->PXEPtr.segment * 16 + pxenv_p->PXEPtr.offset); pxe_call = NULL; } /* * return true if pxe structures are found/initialized, * also figures out our IP information via the pxe cached info struct */ static int pxe_init(void) { t_PXENV_GET_CACHED_INFO *gci_p; int counter; uint8_t checksum; uint8_t *checkptr; extern struct devsw netdev; if (pxenv_p == NULL) return (0); /* look for "PXENV+" */ if (bcmp((void *)pxenv_p->Signature, S_SIZE("PXENV+"))) { pxenv_p = NULL; return (0); } /* make sure the size is something we can handle */ if (pxenv_p->Length > sizeof(*pxenv_p)) { printf("PXENV+ structure too large, ignoring\n"); pxenv_p = NULL; return (0); } /* * do byte checksum: * add up each byte in the structure, the total should be 0 */ checksum = 0; checkptr = (uint8_t *) pxenv_p; for (counter = 0; counter < pxenv_p->Length; counter++) checksum += *checkptr++; if (checksum != 0) { printf("PXENV+ structure failed checksum, ignoring\n"); pxenv_p = NULL; return (0); } /* * PXENV+ passed, so use that if !PXE is not available or * the checksum fails. */ pxe_call = pxenv_call; if (pxenv_p->Version >= 0x0200) { for (;;) { if (bcmp((void *)pxe_p->Signature, S_SIZE("!PXE"))) { pxe_p = NULL; break; } checksum = 0; checkptr = (uint8_t *)pxe_p; for (counter = 0; counter < pxe_p->StructLength; counter++) checksum += *checkptr++; if (checksum != 0) { pxe_p = NULL; break; } pxe_call = bangpxe_call; break; } } pxedisk.dv_open = netdev.dv_open; pxedisk.dv_close = netdev.dv_close; pxedisk.dv_strategy = netdev.dv_strategy; printf("\nPXE version %d.%d, real mode entry point ", (uint8_t) (pxenv_p->Version >> 8), (uint8_t) (pxenv_p->Version & 0xFF)); if (pxe_call == bangpxe_call) printf("@%04x:%04x\n", pxe_p->EntryPointSP.segment, pxe_p->EntryPointSP.offset); else printf("@%04x:%04x\n", pxenv_p->RMEntry.segment, pxenv_p->RMEntry.offset); gci_p = bio_alloc(sizeof(*gci_p)); if (gci_p == NULL) { pxe_p = NULL; return (0); } bzero(gci_p, sizeof(*gci_p)); gci_p->PacketType = PXENV_PACKET_TYPE_BINL_REPLY; pxe_call(PXENV_GET_CACHED_INFO, gci_p); if (gci_p->Status != 0) { pxe_perror(gci_p->Status); bio_free(gci_p, sizeof(*gci_p)); pxe_p = NULL; return (0); } free(bootp_response); if ((bootp_response = malloc(gci_p->BufferSize)) != NULL) { bootp_response_size = gci_p->BufferSize; bcopy(PTOV((gci_p->Buffer.segment << 4) + gci_p->Buffer.offset), bootp_response, bootp_response_size); } bio_free(gci_p, sizeof(*gci_p)); return (1); } static int pxe_print(int verbose) { if (pxe_call == NULL) return (0); printf("%s devices:", pxedisk.dv_name); if (pager_output("\n") != 0) return (1); printf(" %s0:", pxedisk.dv_name); if (verbose) { printf(" %s:%s", inet_ntoa(rootip), rootpath); } return (pager_output("\n")); } static void pxe_cleanup(void) { t_PXENV_UNLOAD_STACK *unload_stack_p; t_PXENV_UNDI_SHUTDOWN *undi_shutdown_p; if (pxe_call == NULL) return; undi_shutdown_p = bio_alloc(sizeof(*undi_shutdown_p)); if (undi_shutdown_p != NULL) { bzero(undi_shutdown_p, sizeof(*undi_shutdown_p)); pxe_call(PXENV_UNDI_SHUTDOWN, undi_shutdown_p); #ifdef PXE_DEBUG if (pxe_debug && undi_shutdown_p->Status != 0) printf("pxe_cleanup: UNDI_SHUTDOWN failed %x\n", undi_shutdown_p->Status); #endif bio_free(undi_shutdown_p, sizeof(*undi_shutdown_p)); } unload_stack_p = bio_alloc(sizeof(*unload_stack_p)); if (unload_stack_p != NULL) { bzero(unload_stack_p, sizeof(*unload_stack_p)); pxe_call(PXENV_UNLOAD_STACK, unload_stack_p); #ifdef PXE_DEBUG if (pxe_debug && unload_stack_p->Status != 0) printf("pxe_cleanup: UNLOAD_STACK failed %x\n", unload_stack_p->Status); #endif bio_free(unload_stack_p, sizeof(*unload_stack_p)); } } void pxe_perror(int err) { return; } void pxenv_call(int func, void *ptr) { #ifdef PXE_DEBUG if (pxe_debug) printf("pxenv_call %x\n", func); #endif bzero(&v86, sizeof(v86)); __pxenvseg = pxenv_p->RMEntry.segment; __pxenvoff = pxenv_p->RMEntry.offset; v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS; v86.es = VTOPSEG(ptr); v86.edi = VTOPOFF(ptr); v86.addr = (VTOPSEG(__pxenventry) << 16) | VTOPOFF(__pxenventry); v86.ebx = func; v86int(); v86.ctl = V86_FLAGS; } void bangpxe_call(int func, void *ptr) { #ifdef PXE_DEBUG if (pxe_debug) printf("bangpxe_call %x\n", func); #endif bzero(&v86, sizeof(v86)); __bangpxeseg = pxe_p->EntryPointSP.segment; __bangpxeoff = pxe_p->EntryPointSP.offset; v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS; v86.edx = VTOPSEG(ptr); v86.eax = VTOPOFF(ptr); v86.addr = (VTOPSEG(__bangpxeentry) << 16) | VTOPOFF(__bangpxeentry); v86.ebx = func; v86int(); v86.ctl = V86_FLAGS; } static int pxe_netif_match(struct netif *nif, void *machdep_hint) { return (1); } static int pxe_netif_probe(struct netif *nif, void *machdep_hint) { if (pxe_call == NULL) return (-1); return (0); } static void pxe_netif_end(struct netif *nif) { t_PXENV_UNDI_CLOSE *undi_close_p; undi_close_p = bio_alloc(sizeof(*undi_close_p)); if (undi_close_p != NULL) { bzero(undi_close_p, sizeof(*undi_close_p)); pxe_call(PXENV_UNDI_CLOSE, undi_close_p); if (undi_close_p->Status != 0) printf("undi close failed: %x\n", undi_close_p->Status); bio_free(undi_close_p, sizeof(*undi_close_p)); } } static void pxe_netif_init(struct iodesc *desc, void *machdep_hint) { t_PXENV_UNDI_GET_INFORMATION *undi_info_p; t_PXENV_UNDI_OPEN *undi_open_p; uint8_t *mac; int i, len; undi_info_p = bio_alloc(sizeof(*undi_info_p)); if (undi_info_p == NULL) return; bzero(undi_info_p, sizeof(*undi_info_p)); pxe_call(PXENV_UNDI_GET_INFORMATION, undi_info_p); if (undi_info_p->Status != 0) { printf("undi get info failed: %x\n", undi_info_p->Status); bio_free(undi_info_p, sizeof(*undi_info_p)); return; } /* Make sure the CurrentNodeAddress is valid. */ for (i = 0; i < undi_info_p->HwAddrLen; ++i) { if (undi_info_p->CurrentNodeAddress[i] != 0) break; } if (i < undi_info_p->HwAddrLen) { for (i = 0; i < undi_info_p->HwAddrLen; ++i) { if (undi_info_p->CurrentNodeAddress[i] != 0xff) break; } } if (i < undi_info_p->HwAddrLen) mac = undi_info_p->CurrentNodeAddress; else mac = undi_info_p->PermNodeAddress; len = min(sizeof (desc->myea), undi_info_p->HwAddrLen); for (i = 0; i < len; ++i) desc->myea[i] = mac[i]; if (bootp_response != NULL) desc->xid = bootp_response->bp_xid; else desc->xid = 0; bio_free(undi_info_p, sizeof(*undi_info_p)); undi_open_p = bio_alloc(sizeof(*undi_open_p)); if (undi_open_p == NULL) return; bzero(undi_open_p, sizeof(*undi_open_p)); undi_open_p->PktFilter = FLTR_DIRECTED | FLTR_BRDCST; pxe_call(PXENV_UNDI_OPEN, undi_open_p); if (undi_open_p->Status != 0) printf("undi open failed: %x\n", undi_open_p->Status); bio_free(undi_open_p, sizeof(*undi_open_p)); } static int pxe_netif_receive(void **pkt) { t_PXENV_UNDI_ISR *isr; char *buf, *ptr, *frame; size_t size, rsize; isr = bio_alloc(sizeof(*isr)); if (isr == NULL) return (-1); bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_START; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0) { bio_free(isr, sizeof(*isr)); return (-1); } bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_PROCESS; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0) { bio_free(isr, sizeof(*isr)); return (-1); } while (isr->FuncFlag == PXENV_UNDI_ISR_OUT_TRANSMIT) { /* * Wait till transmit is done. */ bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0 || isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) { bio_free(isr, sizeof(*isr)); return (-1); } } while (isr->FuncFlag != PXENV_UNDI_ISR_OUT_RECEIVE) { if (isr->Status != 0 || isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) { bio_free(isr, sizeof(*isr)); return (-1); } bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; pxe_call(PXENV_UNDI_ISR, isr); } size = isr->FrameLength; buf = malloc(size + ETHER_ALIGN); if (buf == NULL) { bio_free(isr, sizeof(*isr)); return (-1); } ptr = buf + ETHER_ALIGN; rsize = 0; while (rsize < size) { frame = (char *)((uintptr_t)isr->Frame.segment << 4); frame += isr->Frame.offset; bcopy(PTOV(frame), ptr, isr->BufferLength); ptr += isr->BufferLength; rsize += isr->BufferLength; bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0) { bio_free(isr, sizeof(*isr)); free(buf); return (-1); } /* Did we got another update? */ if (isr->FuncFlag == PXENV_UNDI_ISR_OUT_RECEIVE) continue; break; } *pkt = buf; bio_free(isr, sizeof(*isr)); return (rsize); } static ssize_t pxe_netif_get(struct iodesc *desc, void **pkt, time_t timeout) { time_t t; void *ptr; int ret = -1; t = getsecs(); while ((getsecs() - t) < timeout) { ret = pxe_netif_receive(&ptr); if (ret != -1) { *pkt = ptr; break; } } return (ret); } static ssize_t pxe_netif_put(struct iodesc *desc, void *pkt, size_t len) { t_PXENV_UNDI_TRANSMIT *trans_p; t_PXENV_UNDI_TBD *tbd_p; char *data; ssize_t rv = -1; trans_p = bio_alloc(sizeof(*trans_p)); tbd_p = bio_alloc(sizeof(*tbd_p)); data = bio_alloc(len); if (trans_p != NULL && tbd_p != NULL && data != NULL) { bzero(trans_p, sizeof(*trans_p)); bzero(tbd_p, sizeof(*tbd_p)); trans_p->TBD.segment = VTOPSEG(tbd_p); trans_p->TBD.offset = VTOPOFF(tbd_p); tbd_p->ImmedLength = len; tbd_p->Xmit.segment = VTOPSEG(data); tbd_p->Xmit.offset = VTOPOFF(data); bcopy(pkt, data, len); pxe_call(PXENV_UNDI_TRANSMIT, trans_p); if (trans_p->Status == 0) rv = len; } bio_free(data, len); bio_free(tbd_p, sizeof(*tbd_p)); bio_free(trans_p, sizeof(*trans_p)); return (rv); } Index: head/stand/i386/libi386/pxe.h =================================================================== --- head/stand/i386/libi386/pxe.h (revision 344854) +++ head/stand/i386/libi386/pxe.h (revision 344855) @@ -1,512 +1,511 @@ /* * Copyright (c) 2000 Alfred Perlstein * All rights reserved. * Copyright (c) 2000 Paul Saab * All rights reserved. * Copyright (c) 2000 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * The typedefs and structures declared in this file * clearly violate style(9), the reason for this is to conform to the * typedefs/structure-names used in the Intel literature to avoid confusion. * * It's for your own good. :) */ /* It seems that intel didn't think about ABI, * either that or 16bit ABI != 32bit ABI (which seems reasonable) * I have to thank Intel for the hair loss I incurred trying to figure * out why PXE was mis-reading structures I was passing it (at least * from my point of view) * * Solution: use gcc's '__packed' to correctly align * structures passed into PXE * Question: does this really work for PXE's expected ABI? */ #define PACKED __packed #define S_SIZE(s) s, sizeof(s) - 1 #define PXENFSROOTPATH "/pxeroot" typedef struct { uint16_t offset; uint16_t segment; } SEGOFF16_t; typedef struct { uint16_t Seg_Addr; uint32_t Phy_Addr; uint16_t Seg_Size; } SEGDESC_t; typedef uint16_t SEGSEL_t; typedef uint16_t PXENV_STATUS_t; typedef uint32_t IP4_t; typedef uint32_t ADDR32_t; typedef uint16_t UDP_PORT_t; #define MAC_ADDR_LEN 16 typedef uint8_t MAC_ADDR[MAC_ADDR_LEN]; /* PXENV+ */ typedef struct { uint8_t Signature[6]; /* 'PXENV+' */ uint16_t Version; /* MSB = major, LSB = minor */ uint8_t Length; /* structure length */ uint8_t Checksum; /* checksum pad */ SEGOFF16_t RMEntry; /* SEG:OFF to PXE entry point */ /* don't use PMOffset and PMSelector (from the 2.1 PXE manual) */ uint32_t PMOffset; /* Protected mode entry */ SEGSEL_t PMSelector; /* Protected mode selector */ SEGSEL_t StackSeg; /* Stack segment address */ uint16_t StackSize; /* Stack segment size (bytes) */ SEGSEL_t BC_CodeSeg; /* BC Code segment address */ uint16_t BC_CodeSize; /* BC Code segment size (bytes) */ SEGSEL_t BC_DataSeg; /* BC Data segment address */ uint16_t BC_DataSize; /* BC Data segment size (bytes) */ SEGSEL_t UNDIDataSeg; /* UNDI Data segment address */ uint16_t UNDIDataSize; /* UNDI Data segment size (bytes) */ SEGSEL_t UNDICodeSeg; /* UNDI Code segment address */ uint16_t UNDICodeSize; /* UNDI Code segment size (bytes) */ SEGOFF16_t PXEPtr; /* SEG:OFF to !PXE struct, only present when Version > 2.1 */ } PACKED pxenv_t; /* !PXE */ typedef struct { uint8_t Signature[4]; uint8_t StructLength; uint8_t StructCksum; uint8_t StructRev; uint8_t reserved_1; SEGOFF16_t UNDIROMID; SEGOFF16_t BaseROMID; SEGOFF16_t EntryPointSP; SEGOFF16_t EntryPointESP; SEGOFF16_t StatusCallout; uint8_t reserved_2; uint8_t SegDescCn; SEGSEL_t FirstSelector; SEGDESC_t Stack; SEGDESC_t UNDIData; SEGDESC_t UNDICode; SEGDESC_t UNDICodeWrite; SEGDESC_t BC_Data; SEGDESC_t BC_Code; SEGDESC_t BC_CodeWrite; } PACKED pxe_t; #define PXENV_START_UNDI 0x0000 typedef struct { PXENV_STATUS_t Status; uint16_t ax; uint16_t bx; uint16_t dx; uint16_t di; uint16_t es; } PACKED t_PXENV_START_UNDI; #define PXENV_UNDI_STARTUP 0x0001 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_STARTUP; #define PXENV_UNDI_CLEANUP 0x0002 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_CLEANUP; #define PXENV_UNDI_INITIALIZE 0x0003 typedef struct { PXENV_STATUS_t Status; ADDR32_t ProtocolIni; /* Phys addr of a copy of the driver module */ uint8_t reserved[8]; } PACKED t_PXENV_UNDI_INITIALIZE; #define MAXNUM_MCADDR 8 typedef struct { PXENV_STATUS_t Status; uint16_t MCastAddrCount; MAC_ADDR McastAddr[MAXNUM_MCADDR]; } PACKED t_PXENV_UNDI_MCAST_ADDRESS; #define PXENV_UNDI_RESET_ADAPTER 0x0004 typedef struct { PXENV_STATUS_t Status; t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf; } PACKED t_PXENV_UNDI_RESET; #define PXENV_UNDI_SHUTDOWN 0x0005 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_SHUTDOWN; #define PXENV_UNDI_OPEN 0x0006 typedef struct { PXENV_STATUS_t Status; uint16_t OpenFlag; uint16_t PktFilter; # define FLTR_DIRECTED 0x0001 # define FLTR_BRDCST 0x0002 # define FLTR_PRMSCS 0x0004 # define FLTR_SRC_RTG 0x0008 t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf; } PACKED t_PXENV_UNDI_OPEN; #define PXENV_UNDI_CLOSE 0x0007 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_CLOSE; #define PXENV_UNDI_TRANSMIT 0x0008 typedef struct { PXENV_STATUS_t Status; uint8_t Protocol; # define P_UNKNOWN 0 # define P_IP 1 # define P_ARP 2 # define P_RARP 3 uint8_t XmitFlag; # define XMT_DESTADDR 0x0000 # define XMT_BROADCAST 0x0001 SEGOFF16_t DestAddr; SEGOFF16_t TBD; uint32_t Reserved[2]; } PACKED t_PXENV_UNDI_TRANSMIT; #define MAX_DATA_BLKS 8 typedef struct { uint16_t ImmedLength; SEGOFF16_t Xmit; uint16_t DataBlkCount; struct DataBlk { uint8_t TDPtrType; uint8_t TDRsvdByte; uint16_t TDDataLen; SEGOFF16_t TDDataPtr; } DataBlock[MAX_DATA_BLKS]; } PACKED t_PXENV_UNDI_TBD; #define PXENV_UNDI_SET_MCAST_ADDRESS 0x0009 typedef struct { PXENV_STATUS_t Status; t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf; } PACKED t_PXENV_UNDI_SET_MCAST_ADDR; #define PXENV_UNDI_SET_STATION_ADDRESS 0x000A typedef struct { PXENV_STATUS_t Status; MAC_ADDR StationAddress; /* Temp MAC address to use */ } PACKED t_PXENV_UNDI_SET_STATION_ADDR; #define PXENV_UNDI_SET_PACKET_FILTER 0x000B typedef struct { PXENV_STATUS_t Status; uint8_t filter; /* see UNDI_OPEN (0x0006) */ } PACKED t_PXENV_UNDI_SET_PACKET_FILTER; #define PXENV_UNDI_GET_INFORMATION 0x000C typedef struct { PXENV_STATUS_t Status; uint16_t BaseIo; /* Adapter base I/O address */ uint16_t IntNumber; /* Adapter IRQ number */ uint16_t MaxTranUnit; /* Adapter maximum transmit unit */ uint16_t HwType; /* Type of protocol at the hardware addr */ # define ETHER_TYPE 1 # define EXP_ETHER_TYPE 2 # define IEEE_TYPE 6 uint16_t HwAddrLen; /* Length of hardware address */ MAC_ADDR CurrentNodeAddress; /* Current hardware address */ MAC_ADDR PermNodeAddress; /* Permanent hardware address */ SEGSEL_t ROMAddress; /* Real mode ROM segment address */ uint16_t RxBufCt; /* Receive queue length */ uint16_t TxBufCt; /* Transmit queue length */ } PACKED t_PXENV_UNDI_GET_INFORMATION; #define PXENV_UNDI_GET_STATISTICS 0x000D typedef struct { PXENV_STATUS_t Status; uint32_t XmitGoodFrames; /* Number of successful transmissions */ uint32_t RcvGoodFrames; /* Number of good frames received */ uint32_t RcvCRCErrors; /* Number of frames with CRC errors */ uint32_t RcvResourceErrors; /* Number of frames dropped */ } PACKED t_PXENV_UNDI_GET_STATISTICS; #define PXENV_UNDI_CLEAR_STATISTICS 0x000E typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_CLEAR_STATISTICS; #define PXENV_UNDI_INITIATE_DIAGS 0x000F typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_INITIATE_DIAGS; #define PXENV_UNDI_FORCE_INTERRUPT 0x0010 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_UNDI_FORCE_INTERRUPT; #define PXENV_UNDI_GET_MCAST_ADDRESS 0x0011 typedef struct { PXENV_STATUS_t Status; IP4_t InetAddr; /* IP mulicast address */ MAC_ADDR MediaAddr; /* MAC multicast address */ } PACKED t_PXENV_UNDI_GET_MCAST_ADDR; #define PXENV_UNDI_GET_NIC_TYPE 0x0012 typedef struct { PXENV_STATUS_t Status; uint8_t NicType; /* Type of NIC */ # define PCI_NIC 2 # define PnP_NIC 3 # define CardBus_NIC 4 union { struct { uint16_t Vendor_ID; uint16_t Dev_ID; uint8_t Base_Class; uint8_t Sub_Class; uint8_t Prog_Intf; uint8_t Rev; uint16_t BusDevFunc; uint16_t SubVendor_ID; uint16_t SubDevice_ID; } pci, cardbus; struct { uint32_t EISA_Dev_ID; uint8_t Base_Class; uint8_t Sub_Class; uint8_t Prog_Intf; uint16_t CardSelNum; } pnp; } info; } PACKED t_PXENV_UNDI_GET_NIC_TYPE; #define PXENV_UNDI_GET_IFACE_INFO 0x0013 typedef struct { PXENV_STATUS_t Status; uint8_t IfaceType[16]; /* Name of MAC type in ASCII. */ uint32_t LinkSpeed; /* Defined in NDIS 2.0 spec */ uint32_t ServiceFlags; /* Defined in NDIS 2.0 spec */ uint32_t Reserved[4]; /* must be 0 */ } PACKED t_PXENV_UNDI_GET_NDIS_INFO; #define PXENV_UNDI_ISR 0x0014 typedef struct { PXENV_STATUS_t Status; uint16_t FuncFlag; /* PXENV_UNDI_ISR_OUT_xxx */ uint16_t BufferLength; /* Length of Frame */ uint16_t FrameLength; /* Total length of receiver frame */ uint16_t FrameHeaderLength; /* Length of the media header in Frame */ SEGOFF16_t Frame; /* receive buffer */ uint8_t ProtType; /* Protocol type */ uint8_t PktType; /* Packet Type */ # define PXENV_UNDI_ISR_IN_START 1 # define PXENV_UNDI_ISR_IN_PROCESS 2 # define PXENV_UNDI_ISR_IN_GET_NEXT 3 /* one of these will be returned for PXENV_UNDI_ISR_IN_START */ # define PXENV_UNDI_ISR_OUT_OURS 0 # define PXENV_UNDI_ISR_OUT_NOT_OUTS 1 /* * one of these will bre returned for PXEND_UNDI_ISR_IN_PROCESS * and PXENV_UNDI_ISR_IN_GET_NEXT */ # define PXENV_UNDI_ISR_OUT_DONE 0 # define PXENV_UNDI_ISR_OUT_TRANSMIT 2 # define PXENV_UNDI_ISR_OUT_RECEIVE 3 # define PXENV_UNDI_ISR_OUT_BUSY 4 } PACKED t_PXENV_UNDI_ISR; #define PXENV_STOP_UNDI 0x0015 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_STOP_UNDI; #define PXENV_TFTP_OPEN 0x0020 typedef struct { PXENV_STATUS_t Status; IP4_t ServerIPAddress; IP4_t GatewayIPAddress; uint8_t FileName[128]; UDP_PORT_t TFTPPort; uint16_t PacketSize; } PACKED t_PXENV_TFTP_OPEN; #define PXENV_TFTP_CLOSE 0x0021 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_TFTP_CLOSE; #define PXENV_TFTP_READ 0x0022 typedef struct { PXENV_STATUS_t Status; uint16_t PacketNumber; uint16_t BufferSize; SEGOFF16_t Buffer; } PACKED t_PXENV_TFTP_READ; #define PXENV_TFTP_READ_FILE 0x0023 typedef struct { PXENV_STATUS_t Status; uint8_t FileName[128]; uint32_t BufferSize; ADDR32_t Buffer; IP4_t ServerIPAddress; IP4_t GatewayIPAdress; IP4_t McastIPAdress; UDP_PORT_t TFTPClntPort; UDP_PORT_t TFTPSrvPort; uint16_t TFTPOpenTimeOut; uint16_t TFTPReopenDelay; } PACKED t_PXENV_TFTP_READ_FILE; #define PXENV_TFTP_GET_FSIZE 0x0025 typedef struct { PXENV_STATUS_t Status; IP4_t ServerIPAddress; IP4_t GatewayIPAdress; uint8_t FileName[128]; uint32_t FileSize; } PACKED t_PXENV_TFTP_GET_FSIZE; #define PXENV_UDP_OPEN 0x0030 typedef struct { PXENV_STATUS_t status; IP4_t src_ip; /* IP address of this station */ } PACKED t_PXENV_UDP_OPEN; #define PXENV_UDP_CLOSE 0x0031 typedef struct { PXENV_STATUS_t status; } PACKED t_PXENV_UDP_CLOSE; #define PXENV_UDP_READ 0x0032 typedef struct { PXENV_STATUS_t status; IP4_t src_ip; /* IP of sender */ IP4_t dest_ip; /* Only accept packets sent to this IP */ UDP_PORT_t s_port; /* UDP source port of sender */ UDP_PORT_t d_port; /* Only accept packets sent to this port */ uint16_t buffer_size; /* Size of the packet buffer */ SEGOFF16_t buffer; /* SEG:OFF to the packet buffer */ } PACKED t_PXENV_UDP_READ; #define PXENV_UDP_WRITE 0x0033 typedef struct { PXENV_STATUS_t status; IP4_t ip; /* dest ip addr */ IP4_t gw; /* ip gateway */ UDP_PORT_t src_port; /* source udp port */ UDP_PORT_t dst_port; /* destination udp port */ uint16_t buffer_size; /* Size of the packet buffer */ SEGOFF16_t buffer; /* SEG:OFF to the packet buffer */ } PACKED t_PXENV_UDP_WRITE; #define PXENV_UNLOAD_STACK 0x0070 typedef struct { PXENV_STATUS_t Status; uint8_t reserved[10]; } PACKED t_PXENV_UNLOAD_STACK; #define PXENV_GET_CACHED_INFO 0x0071 typedef struct { PXENV_STATUS_t Status; uint16_t PacketType; /* type (defined right here) */ # define PXENV_PACKET_TYPE_DHCP_DISCOVER 1 # define PXENV_PACKET_TYPE_DHCP_ACK 2 # define PXENV_PACKET_TYPE_BINL_REPLY 3 uint16_t BufferSize; /* max to copy, leave at 0 for pointer */ SEGOFF16_t Buffer; /* copy to, leave at 0 for pointer */ uint16_t BufferLimit; /* max size of buffer in BC dataseg ? */ } PACKED t_PXENV_GET_CACHED_INFO; /* structure filled in by PXENV_GET_CACHED_INFO * (how we determine which IP we downloaded the initial bootstrap from) * words can't describe... */ typedef struct { uint8_t opcode; # define BOOTP_REQ 1 # define BOOTP_REP 2 uint8_t Hardware; /* hardware type */ uint8_t Hardlen; /* hardware addr len */ uint8_t Gatehops; /* zero it */ uint32_t ident; /* random number chosen by client */ uint16_t seconds; /* seconds since did initial bootstrap */ uint16_t Flags; /* seconds since did initial bootstrap */ # define BOOTP_BCAST 0x8000 /* ? */ IP4_t cip; /* Client IP */ IP4_t yip; /* Your IP */ IP4_t sip; /* IP to use for next boot stage */ IP4_t gip; /* Relay IP ? */ MAC_ADDR CAddr; /* Client hardware address */ uint8_t Sname[64]; /* Server's hostname (Optional) */ uint8_t bootfile[128]; /* boot filename */ union { # if 1 # define BOOTP_DHCPVEND 1024 /* DHCP extended vendor field size */ # else # define BOOTP_DHCPVEND 312 /* DHCP standard vendor field size */ # endif uint8_t d[BOOTP_DHCPVEND]; /* raw array of vendor/dhcp options */ struct { uint8_t magic[4]; /* DHCP magic cookie */ # ifndef VM_RFC1048 # define VM_RFC1048 0x63825363L /* ? */ # endif uint32_t flags; /* bootp flags/opcodes */ uint8_t pad[56]; /* I don't think intel knows what a union does... */ } v; } vendor; } PACKED BOOTPLAYER; #define PXENV_RESTART_TFTP 0x0073 #define t_PXENV_RESTART_TFTP t_PXENV_TFTP_READ_FILE #define PXENV_START_BASE 0x0075 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_START_BASE; #define PXENV_STOP_BASE 0x0076 typedef struct { PXENV_STATUS_t Status; } PACKED t_PXENV_STOP_BASE; Index: head/stand/i386/pxeldr/pxeldr.S =================================================================== --- head/stand/i386/pxeldr/pxeldr.S (revision 344854) +++ head/stand/i386/pxeldr/pxeldr.S (revision 344855) @@ -1,301 +1,300 @@ /* * Copyright (c) 2000 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This simple program is a preloader for the normal boot3 loader. It is simply * prepended to the beginning of a fully built and btxld'd loader. It then * copies the loader to the address boot2 normally loads it, emulates the * boot[12] environment (protected mode, a bootinfo struct, etc.), and then jumps * to the start of btxldr to start the boot process. This method allows a stock * /boot/loader to be booted over the network via PXE w/o having to write a * separate PXE-aware client just to load the loader. */ #include #include /* * Memory locations. */ .set MEM_PAGE_SIZE,0x1000 # memory page size, 4k .set MEM_ARG,0x900 # Arguments at start .set MEM_ARG_BTX,0xa100 # Where we move them to so the # BTX client can see them .set MEM_ARG_SIZE,0x18 # Size of the arguments .set MEM_BTX_ADDRESS,0x9000 # where BTX lives .set MEM_BTX_ENTRY,0x9010 # where BTX starts to execute .set MEM_BTX_OFFSET,MEM_PAGE_SIZE # offset of BTX in the loader .set MEM_BTX_CLIENT,0xa000 # where BTX clients live .set MEM_BIOS_KEYBOARD,0x496 # BDA byte with keyboard bit /* * a.out header fields */ .set AOUT_TEXT,0x04 # text segment size .set AOUT_DATA,0x08 # data segment size .set AOUT_BSS,0x0c # zero'd BSS size .set AOUT_SYMBOLS,0x10 # symbol table .set AOUT_ENTRY,0x14 # entry point .set AOUT_HEADER,MEM_PAGE_SIZE # size of the a.out header /* * Segment selectors. */ .set SEL_SDATA,0x8 # Supervisor data .set SEL_RDATA,0x10 # Real mode data .set SEL_SCODE,0x18 # PM-32 code .set SEL_SCODE16,0x20 # PM-16 code /* * BTX constants */ .set INT_SYS,0x30 # BTX syscall interrupt /* * Bit in MEM_BIOS_KEYBOARD that is set if an enhanced keyboard is present */ .set KEYBOARD_BIT,0x10 /* * We expect to be loaded by the BIOS at 0x7c00 (standard boot loader entry * point) */ .code16 .globl start .org 0x0, 0x0 /* * BTX program loader for PXE network booting */ start: cld # string ops inc xorw %ax, %ax # zero %ax movw %ax, %ss # setup the movw $start, %sp # stack movw %es, %cx # save PXENV+ segment movw %ax, %ds # setup the movw %ax, %es # data segments andl $0xffff, %ecx # clear upper words andl $0xffff, %ebx # of %ebx and %ecx shll $4, %ecx # calculate the offset of addl %ebx, %ecx # the PXENV+ struct and pushl %ecx # save it on the stack movw $welcome_msg, %si # %ds:(%si) -> welcome message callw putstr # display the welcome message /* * Setup the arguments that the loader is expecting from boot[12] */ movw $bootinfo_msg, %si # %ds:(%si) -> boot args message callw putstr # display the message movw $MEM_ARG, %bx # %ds:(%bx) -> boot args movw %bx, %di # %es:(%di) -> boot args xorl %eax, %eax # zero %eax movw $(MEM_ARG_SIZE/4), %cx # Size of arguments in 32-bit # dwords rep # Clear the arguments stosl # to zero orb $KARGS_FLAGS_PXE, 0x8(%bx) # kargs->bootflags |= # KARGS_FLAGS_PXE popl 0xc(%bx) # kargs->pxeinfo = *PXENV+ #ifdef ALWAYS_SERIAL /* * set the RBX_SERIAL bit in the howto byte. */ orl $RB_SERIAL, (%bx) # enable serial console #endif #ifdef PROBE_KEYBOARD /* * Look at the BIOS data area to see if we have an enhanced keyboard. If not, * set the RBX_DUAL and RBX_SERIAL bits in the howto byte. */ testb $KEYBOARD_BIT, MEM_BIOS_KEYBOARD # keyboard present? jnz keyb # yes, so skip orl $(RB_MULTIPLE | RB_SERIAL), (%bx) # enable serial console keyb: #endif /* * Turn on the A20 address line */ callw seta20 # Turn A20 on /* * Relocate the loader and BTX using a very lazy protected mode */ movw $relocate_msg, %si # Display the callw putstr # relocation message movl end+AOUT_ENTRY, %edi # %edi is the destination movl $(end+AOUT_HEADER), %esi # %esi is # the start of the text # segment movl end+AOUT_TEXT, %ecx # %ecx = length of the text # segment lgdt gdtdesc # setup our own gdt cli # turn off interrupts movl %cr0, %eax # Turn on orb $0x1, %al # protected movl %eax, %cr0 # mode ljmp $SEL_SCODE,$pm_start # long jump to clear the # instruction pre-fetch queue .code32 pm_start: movw $SEL_SDATA, %ax # Initialize movw %ax, %ds # %ds and movw %ax, %es # %es to a flat selector rep # Relocate the movsb # text segment addl $(MEM_PAGE_SIZE - 1), %edi # pad %edi out to a new page andl $~(MEM_PAGE_SIZE - 1), %edi # for the data segment movl end+AOUT_DATA, %ecx # size of the data segment rep # Relocate the movsb # data segment movl end+AOUT_BSS, %ecx # size of the bss xorl %eax, %eax # zero %eax addb $3, %cl # round %ecx up to shrl $2, %ecx # a multiple of 4 rep # zero the stosl # bss movl end+AOUT_ENTRY, %esi # %esi -> relocated loader addl $MEM_BTX_OFFSET, %esi # %esi -> BTX in the loader movl $MEM_BTX_ADDRESS, %edi # %edi -> where BTX needs to go movzwl 0xa(%esi), %ecx # %ecx -> length of BTX rep # Relocate movsb # BTX ljmp $SEL_SCODE16,$pm_16 # Jump to 16-bit PM .code16 pm_16: movw $SEL_RDATA, %ax # Initialize movw %ax, %ds # %ds and movw %ax, %es # %es to a real mode selector movl %cr0, %eax # Turn off andb $~0x1, %al # protected movl %eax, %cr0 # mode ljmp $0,$pm_end # Long jump to clear the # instruction pre-fetch queue pm_end: sti # Turn interrupts back on now /* * Copy the BTX client to MEM_BTX_CLIENT */ xorw %ax, %ax # zero %ax and set movw %ax, %ds # %ds and %es movw %ax, %es # to segment 0 movw $MEM_BTX_CLIENT, %di # Prepare to relocate movw $btx_client, %si # the simple btx client movw $(btx_client_end-btx_client), %cx # length of btx client rep # Relocate the movsb # simple BTX client /* * Copy the boot[12] args to where the BTX client can see them */ movw $MEM_ARG, %si # where the args are at now movw $MEM_ARG_BTX, %di # where the args are moving to movw $(MEM_ARG_SIZE/4), %cx # size of the arguments in longs rep # Relocate movsl # the words /* * Save the entry point so the client can get to it later on */ movl end+AOUT_ENTRY, %eax # load the entry point stosl # add it to the end of the # arguments /* * Now we just start up BTX and let it do the rest */ movw $jump_message, %si # Display the callw putstr # jump message ljmp $0,$MEM_BTX_ENTRY # Jump to the BTX entry point /* * Display a null-terminated string */ putstr: lodsb # load %al from %ds:(%si) testb %al,%al # stop at null jnz putc # if the char != null, output it retw # return when null is hit putc: movw $0x7,%bx # attribute for output movb $0xe,%ah # BIOS: put_char int $0x10 # call BIOS, print char in %al jmp putstr # keep looping /* * Enable A20. Put an upper limit on the amount of time we wait for the * keyboard controller to get ready (65K x ISA access time). If * we wait more than that amount, the hardware is probably * legacy-free and simply doesn't have a keyboard controller. * Thus, the A20 line is already enabled. */ seta20: cli # Disable interrupts xor %cx,%cx # Clear seta20.1: inc %cx # Increment, overflow? jz seta20.3 # Yes inb $0x64,%al # Get status testb $0x2,%al # Busy? jnz seta20.1 # Yes movb $0xd1,%al # Command: Write outb %al,$0x64 # output port seta20.2: inb $0x64,%al # Get status testb $0x2,%al # Busy? jnz seta20.2 # Yes movb $0xdf,%al # Enable outb %al,$0x60 # A20 seta20.3: sti # Enable interrupts retw # To caller /* * BTX client to start btxldr */ .code32 btx_client: movl $(MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE-4), %esi # %ds:(%esi) -> end # of boot[12] args movl $(MEM_ARG_SIZE/4), %ecx # Number of words to push std # Go backwards push_arg: lodsl # Read argument pushl %eax # Push it onto the stack loop push_arg # Push all of the arguments cld # In case anyone depends on this pushl MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE # Entry point of # the loader pushl %eax # Emulate a near call movl $0x1, %eax # 'exec' system call int $INT_SYS # BTX system call btx_client_end: .code16 .p2align 4 /* * Global descriptor table. */ gdt: .word 0x0,0x0,0x0,0x0 # Null entry .word 0xffff,0x0,0x9200,0xcf # SEL_SDATA .word 0xffff,0x0,0x9200,0x0 # SEL_RDATA .word 0xffff,0x0,0x9a00,0xcf # SEL_SCODE (32-bit) .word 0xffff,0x0,0x9a00,0x8f # SEL_SCODE16 (16-bit) gdt.1: /* * Pseudo-descriptors. */ gdtdesc: .word gdt.1-gdt-1 # Limit .long gdt # Base welcome_msg: .asciz "PXE Loader 1.00\r\n\n" bootinfo_msg: .asciz "Building the boot loader arguments\r\n" relocate_msg: .asciz "Relocating the loader and the BTX\r\n" jump_message: .asciz "Starting the BTX loader\r\n" .p2align 4 end: Index: head/sys/amd64/include/intr_machdep.h =================================================================== --- head/sys/amd64/include/intr_machdep.h (revision 344854) +++ head/sys/amd64/include/intr_machdep.h (revision 344855) @@ -1,45 +1,44 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #include /* * The following data structure holds per-cpu data, and is placed just * above the top of the space used for the NMI and MC# stacks. */ struct nmi_pcpu { register_t np_pcpu; register_t __padding; /* pad to 16 bytes */ }; #endif /* !__MACHINE_INTR_MACHDEP_H__ */ Index: head/sys/arm/arm/ptrace_machdep.c =================================================================== --- head/sys/arm/arm/ptrace_machdep.c (revision 344854) +++ head/sys/arm/arm/ptrace_machdep.c (revision 344855) @@ -1,63 +1,62 @@ /*- * Copyright (c) 2017 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #ifdef VFP #include #endif int cpu_ptrace(struct thread *td, int req, void *addr, int data) { #ifdef VFP mcontext_vfp_t vfp; #endif int error; switch (req) { #ifdef VFP case PT_GETVFPREGS: get_vfpcontext(td, &vfp); error = copyout(&vfp, addr, sizeof(vfp)); break; case PT_SETVFPREGS: error = copyin(addr, &vfp, sizeof(vfp)); if (error == 0) set_vfpcontext(td, &vfp); break; #endif default: error = EINVAL; } return (error); } Index: head/sys/dev/acpica/acpi_isab.c =================================================================== --- head/sys/dev/acpica/acpi_isab.c (revision 344854) +++ head/sys/dev/acpica/acpi_isab.c (revision 344855) @@ -1,131 +1,130 @@ /*- * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * ISA Bridge driver for Generic ISA Bus Devices. See section 10.7 of the * ACPI 2.0a specification for details on this device. */ #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include /* Hooks for the ACPI CA debugging infrastructure. */ #define _COMPONENT ACPI_BUS ACPI_MODULE_NAME("ISA_ACPI") struct acpi_isab_softc { device_t ap_dev; ACPI_HANDLE ap_handle; }; static int acpi_isab_probe(device_t bus); static int acpi_isab_attach(device_t bus); static int acpi_isab_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); static device_method_t acpi_isab_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_isab_probe), DEVMETHOD(device_attach, acpi_isab_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, acpi_isab_read_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD_END }; static driver_t acpi_isab_driver = { "isab", acpi_isab_methods, sizeof(struct acpi_isab_softc), }; DRIVER_MODULE(acpi_isab, acpi, acpi_isab_driver, isab_devclass, 0, 0); MODULE_DEPEND(acpi_isab, acpi, 1, 1, 1); static int acpi_isab_probe(device_t dev) { static char *isa_ids[] = { "PNP0A05", "PNP0A06", NULL }; int rv; if (acpi_disabled("isab") || devclass_get_device(isab_devclass, 0) != dev) return (ENXIO); rv = ACPI_ID_PROBE(device_get_parent(dev), dev, isa_ids, NULL); if (rv <= 0) device_set_desc(dev, "ACPI Generic ISA bridge"); return (rv); } static int acpi_isab_attach(device_t dev) { struct acpi_isab_softc *sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc(dev); sc->ap_dev = dev; sc->ap_handle = acpi_get_handle(dev); return (isab_attach(dev)); } static int acpi_isab_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct acpi_isab_softc *sc = device_get_softc(dev); switch (which) { case ACPI_IVAR_HANDLE: *result = (uintptr_t)sc->ap_handle; return (0); } return (ENOENT); } Index: head/sys/dev/acpica/acpi_pcivar.h =================================================================== --- head/sys/dev/acpica/acpi_pcivar.h (revision 344854) +++ head/sys/dev/acpica/acpi_pcivar.h (revision 344855) @@ -1,38 +1,37 @@ /*- * Copyright (c) 2016 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ACPI_PCIVAR_H_ #define _ACPI_PCIVAR_H_ #ifdef _KERNEL void acpi_pci_child_added(device_t dev, device_t child); #endif #endif /* !_ACPI_PCIVAR_H_ */ Index: head/sys/dev/pci/vga_pci.c =================================================================== --- head/sys/dev/pci/vga_pci.c (revision 344854) +++ head/sys/dev/pci/vga_pci.c (revision 344855) @@ -1,685 +1,684 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Simple driver for PCI VGA display devices. Drivers such as agp(4) and * drm(4) should attach as children of this device. * * XXX: The vgapci name is a hack until we somehow merge the isa vga driver * in or rename it. */ #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include #include #endif #include #include #include /* To re-POST the card. */ struct vga_resource { struct resource *vr_res; int vr_refs; }; struct vga_pci_softc { device_t vga_msi_child; /* Child driver using MSI. */ struct vga_resource vga_bars[PCIR_MAX_BAR_0 + 1]; struct vga_resource vga_bios; }; SYSCTL_DECL(_hw_pci); static struct vga_resource *lookup_res(struct vga_pci_softc *sc, int rid); static struct resource *vga_pci_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); static int vga_pci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r); int vga_pci_default_unit = -1; SYSCTL_INT(_hw_pci, OID_AUTO, default_vgapci_unit, CTLFLAG_RDTUN, &vga_pci_default_unit, -1, "Default VGA-compatible display"); int vga_pci_is_boot_display(device_t dev) { int unit; device_t pcib; uint16_t config; /* Check that the given device is a video card */ if ((pci_get_class(dev) != PCIC_DISPLAY && (pci_get_class(dev) != PCIC_OLD || pci_get_subclass(dev) != PCIS_OLD_VGA))) return (0); unit = device_get_unit(dev); if (vga_pci_default_unit >= 0) { /* * The boot display device was determined by a previous * call to this function, or the user forced it using * the hw.pci.default_vgapci_unit tunable. */ return (vga_pci_default_unit == unit); } /* * The primary video card used as a boot display must have the * "I/O" and "Memory Address Space Decoding" bits set in its * Command register. * * Furthermore, if the card is attached to a bridge, instead of * the root PCI bus, the bridge must have the "VGA Enable" bit * set in its Control register. */ pcib = device_get_parent(device_get_parent(dev)); if (device_get_devclass(device_get_parent(pcib)) == devclass_find("pci")) { /* * The parent bridge is a PCI-to-PCI bridge: check the * value of the "VGA Enable" bit. */ config = pci_read_config(pcib, PCIR_BRIDGECTL_1, 2); if ((config & PCIB_BCR_VGA_ENABLE) == 0) return (0); } config = pci_read_config(dev, PCIR_COMMAND, 2); if ((config & (PCIM_CMD_PORTEN | PCIM_CMD_MEMEN)) == 0) return (0); /* * Disable interrupts until a chipset driver is loaded for * this PCI device. Else unhandled display adapter interrupts * might freeze the CPU. */ pci_write_config(dev, PCIR_COMMAND, config | PCIM_CMD_INTxDIS, 2); /* This video card is the boot display: record its unit number. */ vga_pci_default_unit = unit; device_set_flags(dev, 1); return (1); } void * vga_pci_map_bios(device_t dev, size_t *size) { int rid; struct resource *res; #if defined(__amd64__) || defined(__i386__) if (vga_pci_is_boot_display(dev)) { /* * On x86, the System BIOS copy the default display * device's Video BIOS at a fixed location in system * memory (0xC0000, 128 kBytes long) at boot time. * * We use this copy for the default boot device, because * the original ROM may not be valid after boot. */ *size = VGA_PCI_BIOS_SHADOW_SIZE; return (pmap_mapbios(VGA_PCI_BIOS_SHADOW_ADDR, *size)); } #endif rid = PCIR_BIOS; res = vga_pci_alloc_resource(dev, NULL, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (res == NULL) { return (NULL); } *size = rman_get_size(res); return (rman_get_virtual(res)); } void vga_pci_unmap_bios(device_t dev, void *bios) { struct vga_resource *vr; if (bios == NULL) { return; } #if defined(__amd64__) || defined(__i386__) if (vga_pci_is_boot_display(dev)) { /* We mapped the BIOS shadow copy located at 0xC0000. */ pmap_unmapdev((vm_offset_t)bios, VGA_PCI_BIOS_SHADOW_SIZE); return; } #endif /* * Look up the PCIR_BIOS resource in our softc. It should match * the address we returned previously. */ vr = lookup_res(device_get_softc(dev), PCIR_BIOS); KASSERT(vr->vr_res != NULL, ("vga_pci_unmap_bios: bios not mapped")); KASSERT(rman_get_virtual(vr->vr_res) == bios, ("vga_pci_unmap_bios: mismatch")); vga_pci_release_resource(dev, NULL, SYS_RES_MEMORY, PCIR_BIOS, vr->vr_res); } int vga_pci_repost(device_t dev) { #if defined(__amd64__) || defined(__i386__) x86regs_t regs; if (!vga_pci_is_boot_display(dev)) return (EINVAL); if (x86bios_get_orm(VGA_PCI_BIOS_SHADOW_ADDR) == NULL) return (ENOTSUP); x86bios_init_regs(®s); regs.R_AH = pci_get_bus(dev); regs.R_AL = (pci_get_slot(dev) << 3) | (pci_get_function(dev) & 0x07); regs.R_DL = 0x80; device_printf(dev, "REPOSTing\n"); x86bios_call(®s, X86BIOS_PHYSTOSEG(VGA_PCI_BIOS_SHADOW_ADDR + 3), X86BIOS_PHYSTOOFF(VGA_PCI_BIOS_SHADOW_ADDR + 3)); x86bios_get_intr(0x10); return (0); #else return (ENOTSUP); #endif } static int vga_pci_probe(device_t dev) { switch (pci_get_class(dev)) { case PCIC_DISPLAY: break; case PCIC_OLD: if (pci_get_subclass(dev) != PCIS_OLD_VGA) return (ENXIO); break; default: return (ENXIO); } /* Probe default display. */ vga_pci_is_boot_display(dev); device_set_desc(dev, "VGA-compatible display"); return (BUS_PROBE_GENERIC); } static int vga_pci_attach(device_t dev) { bus_generic_probe(dev); /* Always create a drm child for now to make it easier on drm. */ device_add_child(dev, "drm", -1); device_add_child(dev, "drmn", -1); bus_generic_attach(dev); if (vga_pci_is_boot_display(dev)) device_printf(dev, "Boot video device\n"); return (0); } static int vga_pci_suspend(device_t dev) { return (bus_generic_suspend(dev)); } static int vga_pci_detach(device_t dev) { int error; error = bus_generic_detach(dev); if (error == 0) error = device_delete_children(dev); return (error); } static int vga_pci_resume(device_t dev) { return (bus_generic_resume(dev)); } /* Bus interface. */ static int vga_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { return (BUS_READ_IVAR(device_get_parent(dev), dev, which, result)); } static int vga_pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { return (EINVAL); } static int vga_pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep) { return (BUS_SETUP_INTR(device_get_parent(dev), dev, irq, flags, filter, intr, arg, cookiep)); } static int vga_pci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { return (BUS_TEARDOWN_INTR(device_get_parent(dev), dev, irq, cookie)); } static struct vga_resource * lookup_res(struct vga_pci_softc *sc, int rid) { int bar; if (rid == PCIR_BIOS) return (&sc->vga_bios); bar = PCI_RID2BAR(rid); if (bar >= 0 && bar <= PCIR_MAX_BAR_0) return (&sc->vga_bars[bar]); return (NULL); } static struct resource * vga_pci_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct vga_resource *vr; switch (type) { case SYS_RES_MEMORY: case SYS_RES_IOPORT: /* * For BARs, we cache the resource so that we only allocate it * from the PCI bus once. */ vr = lookup_res(device_get_softc(dev), *rid); if (vr == NULL) return (NULL); if (vr->vr_res == NULL) vr->vr_res = bus_alloc_resource(dev, type, rid, start, end, count, flags); if (vr->vr_res != NULL) vr->vr_refs++; return (vr->vr_res); } return (bus_alloc_resource(dev, type, rid, start, end, count, flags)); } static int vga_pci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { struct vga_resource *vr; int error; switch (type) { case SYS_RES_MEMORY: case SYS_RES_IOPORT: /* * For BARs, we release the resource from the PCI bus * when the last child reference goes away. */ vr = lookup_res(device_get_softc(dev), rid); if (vr == NULL) return (EINVAL); if (vr->vr_res == NULL) return (EINVAL); KASSERT(vr->vr_res == r, ("vga_pci resource mismatch")); if (vr->vr_refs > 1) { vr->vr_refs--; return (0); } KASSERT(vr->vr_refs > 0, ("vga_pci resource reference count underflow")); error = bus_release_resource(dev, type, rid, r); if (error == 0) { vr->vr_res = NULL; vr->vr_refs = 0; } return (error); } return (bus_release_resource(dev, type, rid, r)); } /* PCI interface. */ static uint32_t vga_pci_read_config(device_t dev, device_t child, int reg, int width) { return (pci_read_config(dev, reg, width)); } static void vga_pci_write_config(device_t dev, device_t child, int reg, uint32_t val, int width) { pci_write_config(dev, reg, val, width); } static int vga_pci_enable_busmaster(device_t dev, device_t child) { return (pci_enable_busmaster(dev)); } static int vga_pci_disable_busmaster(device_t dev, device_t child) { return (pci_disable_busmaster(dev)); } static int vga_pci_enable_io(device_t dev, device_t child, int space) { device_printf(dev, "child %s requested pci_enable_io\n", device_get_nameunit(child)); return (pci_enable_io(dev, space)); } static int vga_pci_disable_io(device_t dev, device_t child, int space) { device_printf(dev, "child %s requested pci_disable_io\n", device_get_nameunit(child)); return (pci_disable_io(dev, space)); } static int vga_pci_get_vpd_ident(device_t dev, device_t child, const char **identptr) { return (pci_get_vpd_ident(dev, identptr)); } static int vga_pci_get_vpd_readonly(device_t dev, device_t child, const char *kw, const char **vptr) { return (pci_get_vpd_readonly(dev, kw, vptr)); } static int vga_pci_set_powerstate(device_t dev, device_t child, int state) { device_printf(dev, "child %s requested pci_set_powerstate\n", device_get_nameunit(child)); return (pci_set_powerstate(dev, state)); } static int vga_pci_get_powerstate(device_t dev, device_t child) { device_printf(dev, "child %s requested pci_get_powerstate\n", device_get_nameunit(child)); return (pci_get_powerstate(dev)); } static int vga_pci_assign_interrupt(device_t dev, device_t child) { device_printf(dev, "child %s requested pci_assign_interrupt\n", device_get_nameunit(child)); return (PCI_ASSIGN_INTERRUPT(device_get_parent(dev), dev)); } static int vga_pci_find_cap(device_t dev, device_t child, int capability, int *capreg) { return (pci_find_cap(dev, capability, capreg)); } static int vga_pci_find_next_cap(device_t dev, device_t child, int capability, int start, int *capreg) { return (pci_find_next_cap(dev, capability, start, capreg)); } static int vga_pci_find_extcap(device_t dev, device_t child, int capability, int *capreg) { return (pci_find_extcap(dev, capability, capreg)); } static int vga_pci_find_next_extcap(device_t dev, device_t child, int capability, int start, int *capreg) { return (pci_find_next_extcap(dev, capability, start, capreg)); } static int vga_pci_find_htcap(device_t dev, device_t child, int capability, int *capreg) { return (pci_find_htcap(dev, capability, capreg)); } static int vga_pci_find_next_htcap(device_t dev, device_t child, int capability, int start, int *capreg) { return (pci_find_next_htcap(dev, capability, start, capreg)); } static int vga_pci_alloc_msi(device_t dev, device_t child, int *count) { struct vga_pci_softc *sc; int error; sc = device_get_softc(dev); if (sc->vga_msi_child != NULL) return (EBUSY); error = pci_alloc_msi(dev, count); if (error == 0) sc->vga_msi_child = child; return (error); } static int vga_pci_alloc_msix(device_t dev, device_t child, int *count) { struct vga_pci_softc *sc; int error; sc = device_get_softc(dev); if (sc->vga_msi_child != NULL) return (EBUSY); error = pci_alloc_msix(dev, count); if (error == 0) sc->vga_msi_child = child; return (error); } static int vga_pci_remap_msix(device_t dev, device_t child, int count, const u_int *vectors) { struct vga_pci_softc *sc; sc = device_get_softc(dev); if (sc->vga_msi_child != child) return (ENXIO); return (pci_remap_msix(dev, count, vectors)); } static int vga_pci_release_msi(device_t dev, device_t child) { struct vga_pci_softc *sc; int error; sc = device_get_softc(dev); if (sc->vga_msi_child != child) return (ENXIO); error = pci_release_msi(dev); if (error == 0) sc->vga_msi_child = NULL; return (error); } static int vga_pci_msi_count(device_t dev, device_t child) { return (pci_msi_count(dev)); } static int vga_pci_msix_count(device_t dev, device_t child) { return (pci_msix_count(dev)); } static bus_dma_tag_t vga_pci_get_dma_tag(device_t bus, device_t child) { return (bus_get_dma_tag(bus)); } static device_method_t vga_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vga_pci_probe), DEVMETHOD(device_attach, vga_pci_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, vga_pci_suspend), DEVMETHOD(device_detach, vga_pci_detach), DEVMETHOD(device_resume, vga_pci_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, vga_pci_read_ivar), DEVMETHOD(bus_write_ivar, vga_pci_write_ivar), DEVMETHOD(bus_setup_intr, vga_pci_setup_intr), DEVMETHOD(bus_teardown_intr, vga_pci_teardown_intr), DEVMETHOD(bus_alloc_resource, vga_pci_alloc_resource), DEVMETHOD(bus_release_resource, vga_pci_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_get_dma_tag, vga_pci_get_dma_tag), /* PCI interface */ DEVMETHOD(pci_read_config, vga_pci_read_config), DEVMETHOD(pci_write_config, vga_pci_write_config), DEVMETHOD(pci_enable_busmaster, vga_pci_enable_busmaster), DEVMETHOD(pci_disable_busmaster, vga_pci_disable_busmaster), DEVMETHOD(pci_enable_io, vga_pci_enable_io), DEVMETHOD(pci_disable_io, vga_pci_disable_io), DEVMETHOD(pci_get_vpd_ident, vga_pci_get_vpd_ident), DEVMETHOD(pci_get_vpd_readonly, vga_pci_get_vpd_readonly), DEVMETHOD(pci_get_powerstate, vga_pci_get_powerstate), DEVMETHOD(pci_set_powerstate, vga_pci_set_powerstate), DEVMETHOD(pci_assign_interrupt, vga_pci_assign_interrupt), DEVMETHOD(pci_find_cap, vga_pci_find_cap), DEVMETHOD(pci_find_next_cap, vga_pci_find_next_cap), DEVMETHOD(pci_find_extcap, vga_pci_find_extcap), DEVMETHOD(pci_find_next_extcap, vga_pci_find_next_extcap), DEVMETHOD(pci_find_htcap, vga_pci_find_htcap), DEVMETHOD(pci_find_next_htcap, vga_pci_find_next_htcap), DEVMETHOD(pci_alloc_msi, vga_pci_alloc_msi), DEVMETHOD(pci_alloc_msix, vga_pci_alloc_msix), DEVMETHOD(pci_remap_msix, vga_pci_remap_msix), DEVMETHOD(pci_release_msi, vga_pci_release_msi), DEVMETHOD(pci_msi_count, vga_pci_msi_count), DEVMETHOD(pci_msix_count, vga_pci_msix_count), { 0, 0 } }; static driver_t vga_pci_driver = { "vgapci", vga_pci_methods, sizeof(struct vga_pci_softc), }; static devclass_t vga_devclass; DRIVER_MODULE(vgapci, pci, vga_pci_driver, vga_devclass, 0, 0); MODULE_DEPEND(vgapci, x86bios, 1, 1, 1); Index: head/sys/dev/rc/rc.c =================================================================== --- head/sys/dev/rc/rc.c (revision 344854) +++ head/sys/dev/rc/rc.c (revision 344855) @@ -1,1314 +1,1314 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia. * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia. - * Copyright (C) 2002 by John Baldwin * All rights reserved. + * Copyright (C) 2002 by John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * SDL Communications Riscom/8 (based on Cirrus Logic CL-CD180) driver * */ /*#define RCDEBUG*/ #include "opt_tty.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IOBASE_ADDRS 14 #define rcin(sc, port) RC_IN(sc, port) #define rcout(sc, port, v) RC_OUT(sc, port, v) #define WAITFORCCR(sc, chan) rc_wait0((sc), (chan), __LINE__) #define CCRCMD(sc, chan, cmd) do { \ WAITFORCCR((sc), (chan)); \ rcout((sc), CD180_CCR, (cmd)); \ } while (0) #define RC_IBUFSIZE 256 #define RB_I_HIGH_WATER (TTYHOG - 2 * RC_IBUFSIZE) #define RC_OBUFSIZE 512 #define RC_IHIGHWATER (3 * RC_IBUFSIZE / 4) #define INPUT_FLAGS_SHIFT (2 * RC_IBUFSIZE) #define LOTS_OF_EVENTS 64 #define RC_FAKEID 0x10 /* Per-channel structure */ struct rc_chans { struct rc_softc *rc_rcb; /* back ptr */ u_short rc_flags; /* Misc. flags */ int rc_chan; /* Channel # */ u_char rc_ier; /* intr. enable reg */ u_char rc_msvr; /* modem sig. status */ u_char rc_cor2; /* options reg */ u_char rc_pendcmd; /* special cmd pending */ u_int rc_dcdwaits; /* how many waits DCD in open */ struct tty *rc_tp; /* tty struct */ u_char *rc_iptr; /* Chars input buffer */ u_char *rc_hiwat; /* hi-water mark */ u_char *rc_bufend; /* end of buffer */ u_char *rc_optr; /* ptr in output buf */ u_char *rc_obufend; /* end of output buf */ u_char rc_ibuf[4 * RC_IBUFSIZE]; /* input buffer */ u_char rc_obuf[RC_OBUFSIZE]; /* output buffer */ struct callout rc_dtrcallout; }; /* Per-board structure */ struct rc_softc { device_t sc_dev; struct resource *sc_irq; struct resource *sc_port[IOBASE_ADDRS]; int sc_irqrid; void *sc_hwicookie; bus_space_tag_t sc_bt; bus_space_handle_t sc_bh; u_int sc_unit; /* unit # */ u_char sc_dtr; /* DTR status */ int sc_scheduled_event; void *sc_swicookie; struct rc_chans sc_channels[CD180_NCHAN]; /* channels */ }; /* Static prototypes */ static t_close_t rc_close; static void rc_break(struct tty *, int); static void rc_release_resources(device_t dev); static void rc_intr(void *); static void rc_hwreset(struct rc_softc *, unsigned int); static int rc_test(struct rc_softc *); static void rc_discard_output(struct rc_chans *); static int rc_modem(struct tty *, int, int); static void rc_start(struct tty *); static void rc_stop(struct tty *, int rw); static int rc_param(struct tty *, struct termios *); static void rc_pollcard(void *); static void rc_reinit(struct rc_softc *); #ifdef RCDEBUG static void printrcflags(); #endif static void rc_wait0(struct rc_softc *sc, int chan, int line); static devclass_t rc_devclass; /* Flags */ #define RC_DTR_OFF 0x0001 /* DTR wait, for close/open */ #define RC_ACTOUT 0x0002 /* Dial-out port active */ #define RC_RTSFLOW 0x0004 /* RTS flow ctl enabled */ #define RC_CTSFLOW 0x0008 /* CTS flow ctl enabled */ #define RC_DORXFER 0x0010 /* RXFER event planned */ #define RC_DOXXFER 0x0020 /* XXFER event planned */ #define RC_MODCHG 0x0040 /* Modem status changed */ #define RC_OSUSP 0x0080 /* Output suspended */ #define RC_OSBUSY 0x0100 /* start() routine in progress */ #define RC_WAS_BUFOVFL 0x0200 /* low-level buffer ovferflow */ #define RC_WAS_SILOVFL 0x0400 /* silo buffer overflow */ #define RC_SEND_RDY 0x0800 /* ready to send */ /* Table for translation of RCSR status bits to internal form */ static int rc_rcsrt[16] = { 0, TTY_OE, TTY_FE, TTY_FE|TTY_OE, TTY_PE, TTY_PE|TTY_OE, TTY_PE|TTY_FE, TTY_PE|TTY_FE|TTY_OE, TTY_BI, TTY_BI|TTY_OE, TTY_BI|TTY_FE, TTY_BI|TTY_FE|TTY_OE, TTY_BI|TTY_PE, TTY_BI|TTY_PE|TTY_OE, TTY_BI|TTY_PE|TTY_FE, TTY_BI|TTY_PE|TTY_FE|TTY_OE }; static int rc_ports[] = { 0x220, 0x240, 0x250, 0x260, 0x2a0, 0x2b0, 0x300, 0x320 }; static int iobase_addrs[IOBASE_ADDRS] = { 0, 0x400, 0x800, 0xc00, 0x1400, 0x1800, 0x1c00, 0x2000, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x8000 }; /**********************************************/ static int rc_probe(device_t dev) { u_int port; int i, found; /* * We don't know of any PnP ID's for these cards. */ if (isa_get_logicalid(dev) != 0) return (ENXIO); /* * We have to have an IO port hint that is valid. */ port = isa_get_port(dev); if (port == -1) return (ENXIO); found = 0; for (i = 0; i < nitems(rc_ports); i++) if (rc_ports[i] == port) { found = 1; break; } if (!found) return (ENXIO); /* * We have to have an IRQ hint. */ if (isa_get_irq(dev) == -1) return (ENXIO); device_set_desc(dev, "SDL Riscom/8"); return (0); } static int rc_attach(device_t dev) { struct rc_chans *rc; struct tty *tp; struct rc_softc *sc; u_int port; int base, chan, error, i, x; sc = device_get_softc(dev); sc->sc_dev = dev; /* * We need to have IO ports. Lots of them. We need * the following ranges relative to the base port: * 0x0 - 0x10 * 0x400 - 0x410 * 0x800 - 0x810 * 0xc00 - 0xc10 * 0x1400 - 0x1410 * 0x1800 - 0x1810 * 0x1c00 - 0x1c10 * 0x2000 - 0x2010 * 0x3000 - 0x3010 * 0x3400 - 0x3410 * 0x3800 - 0x3810 * 0x3c00 - 0x3c10 * 0x4000 - 0x4010 * 0x8000 - 0x8010 */ port = isa_get_port(dev); for (i = 0; i < IOBASE_ADDRS; i++) if (bus_set_resource(dev, SYS_RES_IOPORT, i, port + iobase_addrs[i], 0x10) != 0) return (ENXIO); error = ENOMEM; for (i = 0; i < IOBASE_ADDRS; i++) { x = i; sc->sc_port[i] = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &x, 0x10, RF_ACTIVE); if (x != i) { device_printf(dev, "ioport %d was rid %d\n", i, x); goto fail; } if (sc->sc_port[i] == NULL) { device_printf(dev, "failed to alloc ioports %x-%x\n", port + iobase_addrs[i], port + iobase_addrs[i] + 0x10); goto fail; } } sc->sc_bt = rman_get_bustag(sc->sc_port[0]); sc->sc_bh = rman_get_bushandle(sc->sc_port[0]); sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irqrid, RF_ACTIVE); if (sc->sc_irq == NULL) { device_printf(dev, "failed to alloc IRQ\n"); goto fail; } /* * Now do some actual tests to make sure it works. */ error = ENXIO; rcout(sc, CD180_PPRL, 0x22); /* Random values to Prescale reg. */ rcout(sc, CD180_PPRH, 0x11); if (rcin(sc, CD180_PPRL) != 0x22 || rcin(sc, CD180_PPRH) != 0x11) goto fail; if (rc_test(sc)) goto fail; /* * Ok, start actually hooking things up. */ sc->sc_unit = device_get_unit(dev); /*sc->sc_chipid = 0x10 + device_get_unit(dev);*/ device_printf(dev, "%d chans, firmware rev. %c\n", CD180_NCHAN, (rcin(sc, CD180_GFRCR) & 0xF) + 'A'); rc = sc->sc_channels; base = CD180_NCHAN * sc->sc_unit; for (chan = 0; chan < CD180_NCHAN; chan++, rc++) { rc->rc_rcb = sc; rc->rc_chan = chan; rc->rc_iptr = rc->rc_ibuf; rc->rc_bufend = &rc->rc_ibuf[RC_IBUFSIZE]; rc->rc_hiwat = &rc->rc_ibuf[RC_IHIGHWATER]; rc->rc_optr = rc->rc_obufend = rc->rc_obuf; callout_init(&rc->rc_dtrcallout, 0); tp = rc->rc_tp = ttyalloc(); tp->t_sc = rc; tp->t_oproc = rc_start; tp->t_param = rc_param; tp->t_modem = rc_modem; tp->t_break = rc_break; tp->t_close = rc_close; tp->t_stop = rc_stop; ttycreate(tp, TS_CALLOUT, "m%d", chan + base); } error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_TTY, NULL, rc_intr, sc, &sc->sc_hwicookie); if (error) { device_printf(dev, "failed to register interrupt handler\n"); goto fail; } swi_add(&tty_intr_event, "rc", rc_pollcard, sc, SWI_TTY, 0, &sc->sc_swicookie); return (0); fail: rc_release_resources(dev); return (error); } static int rc_detach(device_t dev) { struct rc_softc *sc; struct rc_chans *rc; int error, i; sc = device_get_softc(dev); rc = sc->sc_channels; for (i = 0; i < CD180_NCHAN; i++, rc++) ttyfree(rc->rc_tp); error = bus_teardown_intr(dev, sc->sc_irq, sc->sc_hwicookie); if (error) device_printf(dev, "failed to deregister interrupt handler\n"); swi_remove(sc->sc_swicookie); rc_release_resources(dev); return (0); } static void rc_release_resources(device_t dev) { struct rc_softc *sc; int i; sc = device_get_softc(dev); if (sc->sc_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irqrid, sc->sc_irq); sc->sc_irq = NULL; } for (i = 0; i < IOBASE_ADDRS; i++) { if (sc->sc_port[i] == NULL) break; bus_release_resource(dev, SYS_RES_IOPORT, i, sc->sc_port[i]); sc->sc_port[i] = NULL; } } /* RC interrupt handling */ static void rc_intr(void *arg) { struct rc_softc *sc; struct rc_chans *rc; int resid, chan; u_char val, iack, bsr, ucnt, *optr; int good_data, t_state; sc = (struct rc_softc *)arg; bsr = ~(rcin(sc, RC_BSR)); if (!(bsr & (RC_BSR_TOUT|RC_BSR_RXINT|RC_BSR_TXINT|RC_BSR_MOINT))) { device_printf(sc->sc_dev, "extra interrupt\n"); rcout(sc, CD180_EOIR, 0); return; } while (bsr & (RC_BSR_TOUT|RC_BSR_RXINT|RC_BSR_TXINT|RC_BSR_MOINT)) { #ifdef RCDEBUG_DETAILED device_printf(sc->sc_dev, "intr (%p) %s%s%s%s\n", arg, bsr, (bsr & RC_BSR_TOUT)?"TOUT ":"", (bsr & RC_BSR_RXINT)?"RXINT ":"", (bsr & RC_BSR_TXINT)?"TXINT ":"", (bsr & RC_BSR_MOINT)?"MOINT":""); #endif if (bsr & RC_BSR_TOUT) { device_printf(sc->sc_dev, "hardware failure, reset board\n"); rcout(sc, RC_CTOUT, 0); rc_reinit(sc); return; } if (bsr & RC_BSR_RXINT) { iack = rcin(sc, RC_PILR_RX); good_data = (iack == (GIVR_IT_RGDI | RC_FAKEID)); if (!good_data && iack != (GIVR_IT_REI | RC_FAKEID)) { device_printf(sc->sc_dev, "fake rxint: %02x\n", iack); goto more_intrs; } chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH); rc = &sc->sc_channels[chan]; t_state = rc->rc_tp->t_state; /* Do RTS flow control stuff */ if ( (rc->rc_flags & RC_RTSFLOW) || !(t_state & TS_ISOPEN) ) { if ( ( !(t_state & TS_ISOPEN) || (t_state & TS_TBLOCK) ) && (rc->rc_msvr & MSVR_RTS) ) rcout(sc, CD180_MSVR, rc->rc_msvr &= ~MSVR_RTS); else if (!(rc->rc_msvr & MSVR_RTS)) rcout(sc, CD180_MSVR, rc->rc_msvr |= MSVR_RTS); } ucnt = rcin(sc, CD180_RDCR) & 0xF; resid = 0; if (t_state & TS_ISOPEN) { /* check for input buffer overflow */ if ((rc->rc_iptr + ucnt) >= rc->rc_bufend) { resid = ucnt; ucnt = rc->rc_bufend - rc->rc_iptr; resid -= ucnt; if (!(rc->rc_flags & RC_WAS_BUFOVFL)) { rc->rc_flags |= RC_WAS_BUFOVFL; sc->sc_scheduled_event++; } } optr = rc->rc_iptr; /* check foor good data */ if (good_data) { while (ucnt-- > 0) { val = rcin(sc, CD180_RDR); optr[0] = val; optr[INPUT_FLAGS_SHIFT] = 0; optr++; sc->sc_scheduled_event++; if (val != 0 && val == rc->rc_tp->t_hotchar) swi_sched(sc->sc_swicookie, 0); } } else { /* Store also status data */ while (ucnt-- > 0) { iack = rcin(sc, CD180_RCSR); if (iack & RCSR_Timeout) break; if ( (iack & RCSR_OE) && !(rc->rc_flags & RC_WAS_SILOVFL)) { rc->rc_flags |= RC_WAS_SILOVFL; sc->sc_scheduled_event++; } val = rcin(sc, CD180_RDR); /* Don't store PE if IGNPAR and BREAK if IGNBRK, this hack allows "raw" tty optimization works even if IGN* is set. */ if ( !(iack & (RCSR_PE|RCSR_FE|RCSR_Break)) || ((!(iack & (RCSR_PE|RCSR_FE)) || !(rc->rc_tp->t_iflag & IGNPAR)) && (!(iack & RCSR_Break) || !(rc->rc_tp->t_iflag & IGNBRK)))) { if ( (iack & (RCSR_PE|RCSR_FE)) && (t_state & TS_CAN_BYPASS_L_RINT) && ((iack & RCSR_FE) || ((iack & RCSR_PE) && (rc->rc_tp->t_iflag & INPCK)))) val = 0; else if (val != 0 && val == rc->rc_tp->t_hotchar) swi_sched(sc->sc_swicookie, 0); optr[0] = val; optr[INPUT_FLAGS_SHIFT] = iack; optr++; sc->sc_scheduled_event++; } } } rc->rc_iptr = optr; rc->rc_flags |= RC_DORXFER; } else resid = ucnt; /* Clear FIFO if necessary */ while (resid-- > 0) { if (!good_data) iack = rcin(sc, CD180_RCSR); else iack = 0; if (iack & RCSR_Timeout) break; (void) rcin(sc, CD180_RDR); } goto more_intrs; } if (bsr & RC_BSR_MOINT) { iack = rcin(sc, RC_PILR_MODEM); if (iack != (GIVR_IT_MSCI | RC_FAKEID)) { device_printf(sc->sc_dev, "fake moint: %02x\n", iack); goto more_intrs; } chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH); rc = &sc->sc_channels[chan]; iack = rcin(sc, CD180_MCR); rc->rc_msvr = rcin(sc, CD180_MSVR); rcout(sc, CD180_MCR, 0); #ifdef RCDEBUG printrcflags(rc, "moint"); #endif if (rc->rc_flags & RC_CTSFLOW) { if (rc->rc_msvr & MSVR_CTS) rc->rc_flags |= RC_SEND_RDY; else rc->rc_flags &= ~RC_SEND_RDY; } else rc->rc_flags |= RC_SEND_RDY; if ((iack & MCR_CDchg) && !(rc->rc_flags & RC_MODCHG)) { sc->sc_scheduled_event += LOTS_OF_EVENTS; rc->rc_flags |= RC_MODCHG; swi_sched(sc->sc_swicookie, 0); } goto more_intrs; } if (bsr & RC_BSR_TXINT) { iack = rcin(sc, RC_PILR_TX); if (iack != (GIVR_IT_TDI | RC_FAKEID)) { device_printf(sc->sc_dev, "fake txint: %02x\n", iack); goto more_intrs; } chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH); rc = &sc->sc_channels[chan]; if ( (rc->rc_flags & RC_OSUSP) || !(rc->rc_flags & RC_SEND_RDY) ) goto more_intrs; /* Handle breaks and other stuff */ if (rc->rc_pendcmd) { rcout(sc, CD180_COR2, rc->rc_cor2 |= COR2_ETC); rcout(sc, CD180_TDR, CD180_C_ESC); rcout(sc, CD180_TDR, rc->rc_pendcmd); rcout(sc, CD180_COR2, rc->rc_cor2 &= ~COR2_ETC); rc->rc_pendcmd = 0; goto more_intrs; } optr = rc->rc_optr; resid = rc->rc_obufend - optr; if (resid > CD180_NFIFO) resid = CD180_NFIFO; while (resid-- > 0) rcout(sc, CD180_TDR, *optr++); rc->rc_optr = optr; /* output completed? */ if (optr >= rc->rc_obufend) { rcout(sc, CD180_IER, rc->rc_ier &= ~IER_TxRdy); #ifdef RCDEBUG device_printf(sc->sc_dev, "channel %d: output completed\n", rc->rc_chan); #endif if (!(rc->rc_flags & RC_DOXXFER)) { sc->sc_scheduled_event += LOTS_OF_EVENTS; rc->rc_flags |= RC_DOXXFER; swi_sched(sc->sc_swicookie, 0); } } } more_intrs: rcout(sc, CD180_EOIR, 0); /* end of interrupt */ rcout(sc, RC_CTOUT, 0); bsr = ~(rcin(sc, RC_BSR)); } } /* Feed characters to output buffer */ static void rc_start(struct tty *tp) { struct rc_softc *sc; struct rc_chans *rc; int s; rc = tp->t_sc; if (rc->rc_flags & RC_OSBUSY) return; sc = rc->rc_rcb; s = spltty(); rc->rc_flags |= RC_OSBUSY; critical_enter(); if (tp->t_state & TS_TTSTOP) rc->rc_flags |= RC_OSUSP; else rc->rc_flags &= ~RC_OSUSP; /* Do RTS flow control stuff */ if ( (rc->rc_flags & RC_RTSFLOW) && (tp->t_state & TS_TBLOCK) && (rc->rc_msvr & MSVR_RTS) ) { rcout(sc, CD180_CAR, rc->rc_chan); rcout(sc, CD180_MSVR, rc->rc_msvr &= ~MSVR_RTS); } else if (!(rc->rc_msvr & MSVR_RTS)) { rcout(sc, CD180_CAR, rc->rc_chan); rcout(sc, CD180_MSVR, rc->rc_msvr |= MSVR_RTS); } critical_exit(); if (tp->t_state & (TS_TIMEOUT|TS_TTSTOP)) goto out; #ifdef RCDEBUG printrcflags(rc, "rcstart"); #endif ttwwakeup(tp); #ifdef RCDEBUG printf("rcstart: outq = %d obuf = %d\n", tp->t_outq.c_cc, rc->rc_obufend - rc->rc_optr); #endif if (tp->t_state & TS_BUSY) goto out; /* output still in progress ... */ if (tp->t_outq.c_cc > 0) { u_int ocnt; tp->t_state |= TS_BUSY; ocnt = q_to_b(&tp->t_outq, rc->rc_obuf, sizeof rc->rc_obuf); critical_enter(); rc->rc_optr = rc->rc_obuf; rc->rc_obufend = rc->rc_optr + ocnt; critical_exit(); if (!(rc->rc_ier & IER_TxRdy)) { #ifdef RCDEBUG device_printf(sc->sc_dev, "channel %d: rcstart enable txint\n", rc->rc_chan); #endif rcout(sc, CD180_CAR, rc->rc_chan); rcout(sc, CD180_IER, rc->rc_ier |= IER_TxRdy); } } out: rc->rc_flags &= ~RC_OSBUSY; (void) splx(s); } /* Handle delayed events. */ void rc_pollcard(void *arg) { struct rc_softc *sc; struct rc_chans *rc; struct tty *tp; u_char *tptr, *eptr; int chan, icnt; sc = (struct rc_softc *)arg; if (sc->sc_scheduled_event == 0) return; do { rc = sc->sc_channels; for (chan = 0; chan < CD180_NCHAN; rc++, chan++) { tp = rc->rc_tp; #ifdef RCDEBUG if (rc->rc_flags & (RC_DORXFER|RC_DOXXFER|RC_MODCHG| RC_WAS_BUFOVFL|RC_WAS_SILOVFL)) printrcflags(rc, "rcevent"); #endif if (rc->rc_flags & RC_WAS_BUFOVFL) { critical_enter(); rc->rc_flags &= ~RC_WAS_BUFOVFL; sc->sc_scheduled_event--; critical_exit(); device_printf(sc->sc_dev, "channel %d: interrupt-level buffer overflow\n", chan); } if (rc->rc_flags & RC_WAS_SILOVFL) { critical_enter(); rc->rc_flags &= ~RC_WAS_SILOVFL; sc->sc_scheduled_event--; critical_exit(); device_printf(sc->sc_dev, "channel %d: silo overflow\n", chan); } if (rc->rc_flags & RC_MODCHG) { critical_enter(); rc->rc_flags &= ~RC_MODCHG; sc->sc_scheduled_event -= LOTS_OF_EVENTS; critical_exit(); ttyld_modem(tp, !!(rc->rc_msvr & MSVR_CD)); } if (rc->rc_flags & RC_DORXFER) { critical_enter(); rc->rc_flags &= ~RC_DORXFER; eptr = rc->rc_iptr; if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE]) tptr = &rc->rc_ibuf[RC_IBUFSIZE]; else tptr = rc->rc_ibuf; icnt = eptr - tptr; if (icnt > 0) { if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE]) { rc->rc_iptr = rc->rc_ibuf; rc->rc_bufend = &rc->rc_ibuf[RC_IBUFSIZE]; rc->rc_hiwat = &rc->rc_ibuf[RC_IHIGHWATER]; } else { rc->rc_iptr = &rc->rc_ibuf[RC_IBUFSIZE]; rc->rc_bufend = &rc->rc_ibuf[2 * RC_IBUFSIZE]; rc->rc_hiwat = &rc->rc_ibuf[RC_IBUFSIZE + RC_IHIGHWATER]; } if ( (rc->rc_flags & RC_RTSFLOW) && (tp->t_state & TS_ISOPEN) && !(tp->t_state & TS_TBLOCK) && !(rc->rc_msvr & MSVR_RTS) ) { rcout(sc, CD180_CAR, chan); rcout(sc, CD180_MSVR, rc->rc_msvr |= MSVR_RTS); } sc->sc_scheduled_event -= icnt; } critical_exit(); if (icnt <= 0 || !(tp->t_state & TS_ISOPEN)) goto done1; if ( (tp->t_state & TS_CAN_BYPASS_L_RINT) && !(tp->t_state & TS_LOCAL)) { if ((tp->t_rawq.c_cc + icnt) >= RB_I_HIGH_WATER && ((rc->rc_flags & RC_RTSFLOW) || (tp->t_iflag & IXOFF)) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); tk_nin += icnt; tk_rawcc += icnt; tp->t_rawcc += icnt; if (b_to_q(tptr, icnt, &tp->t_rawq)) device_printf(sc->sc_dev, "channel %d: tty-level buffer overflow\n", chan); ttwakeup(tp); if ((tp->t_state & TS_TTSTOP) && ((tp->t_iflag & IXANY) || (tp->t_cc[VSTART] == tp->t_cc[VSTOP]))) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; rc_start(tp); } } else { for (; tptr < eptr; tptr++) ttyld_rint(tp, (tptr[0] | rc_rcsrt[tptr[INPUT_FLAGS_SHIFT] & 0xF])); } done1: ; } if (rc->rc_flags & RC_DOXXFER) { critical_enter(); sc->sc_scheduled_event -= LOTS_OF_EVENTS; rc->rc_flags &= ~RC_DOXXFER; rc->rc_tp->t_state &= ~TS_BUSY; critical_exit(); ttyld_start(tp); } if (sc->sc_scheduled_event == 0) break; } } while (sc->sc_scheduled_event >= LOTS_OF_EVENTS); } static void rc_stop(struct tty *tp, int rw) { struct rc_softc *sc; struct rc_chans *rc; u_char *tptr, *eptr; rc = tp->t_sc; sc = rc->rc_rcb; #ifdef RCDEBUG device_printf(sc->sc_dev, "channel %d: rc_stop %s%s\n", rc->rc_chan, (rw & FWRITE)?"FWRITE ":"", (rw & FREAD)?"FREAD":""); #endif if (rw & FWRITE) rc_discard_output(rc); critical_enter(); if (rw & FREAD) { rc->rc_flags &= ~RC_DORXFER; eptr = rc->rc_iptr; if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE]) { tptr = &rc->rc_ibuf[RC_IBUFSIZE]; rc->rc_iptr = &rc->rc_ibuf[RC_IBUFSIZE]; } else { tptr = rc->rc_ibuf; rc->rc_iptr = rc->rc_ibuf; } sc->sc_scheduled_event -= eptr - tptr; } if (tp->t_state & TS_TTSTOP) rc->rc_flags |= RC_OSUSP; else rc->rc_flags &= ~RC_OSUSP; critical_exit(); } static void rc_close(struct tty *tp) { struct rc_chans *rc; struct rc_softc *sc; int s; rc = tp->t_sc; sc = rc->rc_rcb; s = spltty(); rcout(sc, CD180_CAR, rc->rc_chan); /* Disable rx/tx intrs */ rcout(sc, CD180_IER, rc->rc_ier = 0); if ( (tp->t_cflag & HUPCL) || (!(rc->rc_flags & RC_ACTOUT) && !(rc->rc_msvr & MSVR_CD) && !(tp->t_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN) ) { CCRCMD(sc, rc->rc_chan, CCR_ResetChan); WAITFORCCR(sc, rc->rc_chan); (void) rc_modem(tp, SER_RTS, 0); ttydtrwaitstart(tp); } rc->rc_flags &= ~RC_ACTOUT; wakeup( &rc->rc_rcb); /* wake bi */ wakeup(TSA_CARR_ON(tp)); (void) splx(s); } /* Reset the bastard */ static void rc_hwreset(struct rc_softc *sc, u_int chipid) { CCRCMD(sc, -1, CCR_HWRESET); /* Hardware reset */ DELAY(20000); WAITFORCCR(sc, -1); rcout(sc, RC_CTOUT, 0); /* Clear timeout */ rcout(sc, CD180_GIVR, chipid); rcout(sc, CD180_GICR, 0); /* Set Prescaler Registers (1 msec) */ rcout(sc, CD180_PPRL, ((RC_OSCFREQ + 999) / 1000) & 0xFF); rcout(sc, CD180_PPRH, ((RC_OSCFREQ + 999) / 1000) >> 8); /* Initialize Priority Interrupt Level Registers */ rcout(sc, CD180_PILR1, RC_PILR_MODEM); rcout(sc, CD180_PILR2, RC_PILR_TX); rcout(sc, CD180_PILR3, RC_PILR_RX); /* Reset DTR */ rcout(sc, RC_DTREG, ~0); } /* Set channel parameters */ static int rc_param(struct tty *tp, struct termios *ts) { struct rc_softc *sc; struct rc_chans *rc; int idivs, odivs, s, val, cflag, iflag, lflag, inpflow; if ( ts->c_ospeed < 0 || ts->c_ospeed > 76800 || ts->c_ispeed < 0 || ts->c_ispeed > 76800 ) return (EINVAL); if (ts->c_ispeed == 0) ts->c_ispeed = ts->c_ospeed; odivs = RC_BRD(ts->c_ospeed); idivs = RC_BRD(ts->c_ispeed); rc = tp->t_sc; sc = rc->rc_rcb; s = spltty(); /* Select channel */ rcout(sc, CD180_CAR, rc->rc_chan); /* If speed == 0, hangup line */ if (ts->c_ospeed == 0) { CCRCMD(sc, rc->rc_chan, CCR_ResetChan); WAITFORCCR(sc, rc->rc_chan); (void) rc_modem(tp, 0, SER_DTR); } tp->t_state &= ~TS_CAN_BYPASS_L_RINT; cflag = ts->c_cflag; iflag = ts->c_iflag; lflag = ts->c_lflag; if (idivs > 0) { rcout(sc, CD180_RBPRL, idivs & 0xFF); rcout(sc, CD180_RBPRH, idivs >> 8); } if (odivs > 0) { rcout(sc, CD180_TBPRL, odivs & 0xFF); rcout(sc, CD180_TBPRH, odivs >> 8); } /* set timeout value */ if (ts->c_ispeed > 0) { int itm = ts->c_ispeed > 2400 ? 5 : 10000 / ts->c_ispeed + 1; if ( !(lflag & ICANON) && ts->c_cc[VMIN] != 0 && ts->c_cc[VTIME] != 0 && ts->c_cc[VTIME] * 10 > itm) itm = ts->c_cc[VTIME] * 10; rcout(sc, CD180_RTPR, itm <= 255 ? itm : 255); } switch (cflag & CSIZE) { case CS5: val = COR1_5BITS; break; case CS6: val = COR1_6BITS; break; case CS7: val = COR1_7BITS; break; default: case CS8: val = COR1_8BITS; break; } if (cflag & PARENB) { val |= COR1_NORMPAR; if (cflag & PARODD) val |= COR1_ODDP; if (!(cflag & INPCK)) val |= COR1_Ignore; } else val |= COR1_Ignore; if (cflag & CSTOPB) val |= COR1_2SB; rcout(sc, CD180_COR1, val); /* Set FIFO threshold */ val = ts->c_ospeed <= 4800 ? 1 : CD180_NFIFO / 2; inpflow = 0; if ( (iflag & IXOFF) && ( ts->c_cc[VSTOP] != _POSIX_VDISABLE && ( ts->c_cc[VSTART] != _POSIX_VDISABLE || (iflag & IXANY) ) ) ) { inpflow = 1; val |= COR3_SCDE|COR3_FCT; } rcout(sc, CD180_COR3, val); /* Initialize on-chip automatic flow control */ val = 0; rc->rc_flags &= ~(RC_CTSFLOW|RC_SEND_RDY); if (cflag & CCTS_OFLOW) { rc->rc_flags |= RC_CTSFLOW; val |= COR2_CtsAE; } else rc->rc_flags |= RC_SEND_RDY; if (tp->t_state & TS_TTSTOP) rc->rc_flags |= RC_OSUSP; else rc->rc_flags &= ~RC_OSUSP; if (cflag & CRTS_IFLOW) rc->rc_flags |= RC_RTSFLOW; else rc->rc_flags &= ~RC_RTSFLOW; if (inpflow) { if (ts->c_cc[VSTART] != _POSIX_VDISABLE) rcout(sc, CD180_SCHR1, ts->c_cc[VSTART]); rcout(sc, CD180_SCHR2, ts->c_cc[VSTOP]); val |= COR2_TxIBE; if (iflag & IXANY) val |= COR2_IXM; } rcout(sc, CD180_COR2, rc->rc_cor2 = val); CCRCMD(sc, rc->rc_chan, CCR_CORCHG1 | CCR_CORCHG2 | CCR_CORCHG3); ttyldoptim(tp); /* modem ctl */ val = cflag & CLOCAL ? 0 : MCOR1_CDzd; if (cflag & CCTS_OFLOW) val |= MCOR1_CTSzd; rcout(sc, CD180_MCOR1, val); val = cflag & CLOCAL ? 0 : MCOR2_CDod; if (cflag & CCTS_OFLOW) val |= MCOR2_CTSod; rcout(sc, CD180_MCOR2, val); /* enable i/o and interrupts */ CCRCMD(sc, rc->rc_chan, CCR_XMTREN | ((cflag & CREAD) ? CCR_RCVREN : CCR_RCVRDIS)); WAITFORCCR(sc, rc->rc_chan); rc->rc_ier = cflag & CLOCAL ? 0 : IER_CD; if (cflag & CCTS_OFLOW) rc->rc_ier |= IER_CTS; if (cflag & CREAD) rc->rc_ier |= IER_RxData; if (tp->t_state & TS_BUSY) rc->rc_ier |= IER_TxRdy; if (ts->c_ospeed != 0) rc_modem(tp, SER_DTR, 0); if ((cflag & CCTS_OFLOW) && (rc->rc_msvr & MSVR_CTS)) rc->rc_flags |= RC_SEND_RDY; rcout(sc, CD180_IER, rc->rc_ier); (void) splx(s); return 0; } /* Re-initialize board after bogus interrupts */ static void rc_reinit(struct rc_softc *sc) { struct rc_chans *rc; int i; rc_hwreset(sc, RC_FAKEID); rc = sc->sc_channels; for (i = 0; i < CD180_NCHAN; i++, rc++) (void) rc_param(rc->rc_tp, &rc->rc_tp->t_termios); } /* Modem control routines */ static int rc_modem(struct tty *tp, int biton, int bitoff) { struct rc_chans *rc; struct rc_softc *sc; u_char *dtr; u_char msvr; rc = tp->t_sc; sc = rc->rc_rcb; dtr = &sc->sc_dtr; rcout(sc, CD180_CAR, rc->rc_chan); if (biton == 0 && bitoff == 0) { msvr = rc->rc_msvr = rcin(sc, CD180_MSVR); if (msvr & MSVR_RTS) biton |= SER_RTS; if (msvr & MSVR_CTS) biton |= SER_CTS; if (msvr & MSVR_DSR) biton |= SER_DSR; if (msvr & MSVR_DTR) biton |= SER_DTR; if (msvr & MSVR_CD) biton |= SER_DCD; if (~rcin(sc, RC_RIREG) & (1 << rc->rc_chan)) biton |= SER_RI; return biton; } if (biton & SER_DTR) rcout(sc, RC_DTREG, ~(*dtr |= 1 << rc->rc_chan)); if (bitoff & SER_DTR) rcout(sc, RC_DTREG, ~(*dtr &= ~(1 << rc->rc_chan))); msvr = rcin(sc, CD180_MSVR); if (biton & SER_DTR) msvr |= MSVR_DTR; if (bitoff & SER_DTR) msvr &= ~MSVR_DTR; if (biton & SER_RTS) msvr |= MSVR_RTS; if (bitoff & SER_RTS) msvr &= ~MSVR_RTS; rcout(sc, CD180_MSVR, msvr); return 0; } static void rc_break(struct tty *tp, int brk) { struct rc_chans *rc; rc = tp->t_sc; if (brk) rc->rc_pendcmd = CD180_C_SBRK; else rc->rc_pendcmd = CD180_C_EBRK; } #define ERR(s) do { \ device_printf(sc->sc_dev, "%s", ""); \ printf s ; \ printf("\n"); \ (void) splx(old_level); \ return 1; \ } while (0) /* Test the board. */ int rc_test(struct rc_softc *sc) { int chan = 0; int i = 0, rcnt, old_level; unsigned int iack, chipid; unsigned short divs; static u_char ctest[] = "\377\125\252\045\244\0\377"; #define CTLEN 8 struct rtest { u_char txbuf[CD180_NFIFO]; /* TX buffer */ u_char rxbuf[CD180_NFIFO]; /* RX buffer */ int rxptr; /* RX pointer */ int txptr; /* TX pointer */ } tchans[CD180_NCHAN]; old_level = spltty(); chipid = RC_FAKEID; /* First, reset board to initial state */ rc_hwreset(sc, chipid); divs = RC_BRD(19200); /* Initialize channels */ for (chan = 0; chan < CD180_NCHAN; chan++) { /* Select and reset channel */ rcout(sc, CD180_CAR, chan); CCRCMD(sc, chan, CCR_ResetChan); WAITFORCCR(sc, chan); /* Set speed */ rcout(sc, CD180_RBPRL, divs & 0xFF); rcout(sc, CD180_RBPRH, divs >> 8); rcout(sc, CD180_TBPRL, divs & 0xFF); rcout(sc, CD180_TBPRH, divs >> 8); /* set timeout value */ rcout(sc, CD180_RTPR, 0); /* Establish local loopback */ rcout(sc, CD180_COR1, COR1_NOPAR | COR1_8BITS | COR1_1SB); rcout(sc, CD180_COR2, COR2_LLM); rcout(sc, CD180_COR3, CD180_NFIFO); CCRCMD(sc, chan, CCR_CORCHG1 | CCR_CORCHG2 | CCR_CORCHG3); CCRCMD(sc, chan, CCR_RCVREN | CCR_XMTREN); WAITFORCCR(sc, chan); rcout(sc, CD180_MSVR, MSVR_RTS); /* Fill TXBUF with test data */ for (i = 0; i < CD180_NFIFO; i++) { tchans[chan].txbuf[i] = ctest[i]; tchans[chan].rxbuf[i] = 0; } tchans[chan].txptr = tchans[chan].rxptr = 0; /* Now, start transmit */ rcout(sc, CD180_IER, IER_TxMpty|IER_RxData); } /* Pseudo-interrupt poll stuff */ for (rcnt = 10000; rcnt-- > 0; rcnt--) { i = ~(rcin(sc, RC_BSR)); if (i & RC_BSR_TOUT) ERR(("BSR timeout bit set\n")); else if (i & RC_BSR_TXINT) { iack = rcin(sc, RC_PILR_TX); if (iack != (GIVR_IT_TDI | chipid)) ERR(("Bad TX intr ack (%02x != %02x)\n", iack, GIVR_IT_TDI | chipid)); chan = (rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH; /* If no more data to transmit, disable TX intr */ if (tchans[chan].txptr >= CD180_NFIFO) { iack = rcin(sc, CD180_IER); rcout(sc, CD180_IER, iack & ~IER_TxMpty); } else { for (iack = tchans[chan].txptr; iack < CD180_NFIFO; iack++) rcout(sc, CD180_TDR, tchans[chan].txbuf[iack]); tchans[chan].txptr = iack; } rcout(sc, CD180_EOIR, 0); } else if (i & RC_BSR_RXINT) { u_char ucnt; iack = rcin(sc, RC_PILR_RX); if (iack != (GIVR_IT_RGDI | chipid) && iack != (GIVR_IT_REI | chipid)) ERR(("Bad RX intr ack (%02x != %02x)\n", iack, GIVR_IT_RGDI | chipid)); chan = (rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH; ucnt = rcin(sc, CD180_RDCR) & 0xF; while (ucnt-- > 0) { iack = rcin(sc, CD180_RCSR); if (iack & RCSR_Timeout) break; if (iack & 0xF) ERR(("Bad char chan %d (RCSR = %02X)\n", chan, iack)); if (tchans[chan].rxptr > CD180_NFIFO) ERR(("Got extra chars chan %d\n", chan)); tchans[chan].rxbuf[tchans[chan].rxptr++] = rcin(sc, CD180_RDR); } rcout(sc, CD180_EOIR, 0); } rcout(sc, RC_CTOUT, 0); for (iack = chan = 0; chan < CD180_NCHAN; chan++) if (tchans[chan].rxptr >= CD180_NFIFO) iack++; if (iack == CD180_NCHAN) break; } for (chan = 0; chan < CD180_NCHAN; chan++) { /* Select and reset channel */ rcout(sc, CD180_CAR, chan); CCRCMD(sc, chan, CCR_ResetChan); } if (!rcnt) ERR(("looses characters during local loopback\n")); /* Now, check data */ for (chan = 0; chan < CD180_NCHAN; chan++) for (i = 0; i < CD180_NFIFO; i++) if (ctest[i] != tchans[chan].rxbuf[i]) ERR(("data mismatch chan %d ptr %d (%d != %d)\n", chan, i, ctest[i], tchans[chan].rxbuf[i])); (void) splx(old_level); return 0; } #ifdef RCDEBUG static void printrcflags(struct rc_chans *rc, char *comment) { struct rc_softc *sc; u_short f = rc->rc_flags; sc = rc->rc_rcb; printf("rc%d/%d: %s flags: %s%s%s%s%s%s%s%s%s%s%s%s\n", rc->rc_rcb->rcb_unit, rc->rc_chan, comment, (f & RC_DTR_OFF)?"DTR_OFF " :"", (f & RC_ACTOUT) ?"ACTOUT " :"", (f & RC_RTSFLOW)?"RTSFLOW " :"", (f & RC_CTSFLOW)?"CTSFLOW " :"", (f & RC_DORXFER)?"DORXFER " :"", (f & RC_DOXXFER)?"DOXXFER " :"", (f & RC_MODCHG) ?"MODCHG " :"", (f & RC_OSUSP) ?"OSUSP " :"", (f & RC_OSBUSY) ?"OSBUSY " :"", (f & RC_WAS_BUFOVFL) ?"BUFOVFL " :"", (f & RC_WAS_SILOVFL) ?"SILOVFL " :"", (f & RC_SEND_RDY) ?"SEND_RDY":""); rcout(sc, CD180_CAR, rc->rc_chan); printf("rc%d/%d: msvr %02x ier %02x ccsr %02x\n", rc->rc_rcb->rcb_unit, rc->rc_chan, rcin(sc, CD180_MSVR), rcin(sc, CD180_IER), rcin(sc, CD180_CCSR)); } #endif /* RCDEBUG */ static void rc_discard_output(struct rc_chans *rc) { critical_enter(); if (rc->rc_flags & RC_DOXXFER) { rc->rc_rcb->sc_scheduled_event -= LOTS_OF_EVENTS; rc->rc_flags &= ~RC_DOXXFER; } rc->rc_optr = rc->rc_obufend; rc->rc_tp->t_state &= ~TS_BUSY; critical_exit(); ttwwakeup(rc->rc_tp); } static void rc_wait0(struct rc_softc *sc, int chan, int line) { int rcnt; for (rcnt = 50; rcnt && rcin(sc, CD180_CCR); rcnt--) DELAY(30); if (rcnt == 0) device_printf(sc->sc_dev, "channel %d command timeout, rc.c line: %d\n", chan, line); } static device_method_t rc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rc_probe), DEVMETHOD(device_attach, rc_attach), DEVMETHOD(device_detach, rc_detach), { 0, 0 } }; static driver_t rc_driver = { "rc", rc_methods, sizeof(struct rc_softc), }; DRIVER_MODULE(rc, isa, rc_driver, rc_devclass, 0, 0); Index: head/sys/dev/rc/rcreg.h =================================================================== --- head/sys/dev/rc/rcreg.h (revision 344854) +++ head/sys/dev/rc/rcreg.h (revision 344855) @@ -1,72 +1,72 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia. * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia. - * Copyright (C) 2002 by John Baldwin * All rights reserved. + * Copyright (C) 2002 by John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Cirrus Logic CD180 -based RISCom/8 board definitions */ /* Oscillator frequency - 19660.08Mhz / 2 */ #define RC_OSCFREQ 9830400 #define RC_BRD(s) ((s) == 0 ? 0 : \ (((RC_OSCFREQ + (s) / 2) / (s)) + CD180_CTICKS/2) / CD180_CTICKS) /* Riscom/8 board ISA I/O mapping */ #define RC_IOMAP(r) ((((r) & 07) << 1) | (((r) & ~07) << 7)) /* I/O commands */ #define RC_OUT(sc, addr, value) \ bus_space_write_1((sc)->sc_bt, (sc)->sc_bh, RC_IOMAP(addr), (value)) #define RC_IN(sc, addr) \ bus_space_read_1((sc)->sc_bt, (sc)->sc_bh, RC_IOMAP(addr)) /* Riscom on-board registers (mapping assumed) */ #define RC_RIREG 0x100 /* Ring Indicator Register (read-only) */ #define RC_DTREG 0x100 /* DTR Register (write-only) */ #define RC_BSR 0x101 /* Board Status Register (read-only) */ #define RC_CTOUT 0x101 /* Clear Timeout (write-only) */ /* Board Status Register */ #define RC_BSR_TOUT 0x08 /* Timeout */ #define RC_BSR_RXINT 0x04 /* Receiver Interrupt */ #define RC_BSR_TXINT 0x02 /* Transmitter Interrupt */ #define RC_BSR_MOINT 0x01 /* Modem Control Interrupt */ /* Interrupt groups */ #define RC_MODEMGRP 0x01 /* Modem interrupt group */ #define RC_RXGRP 0x02 /* Receiver interrupt group */ #define RC_TXGRP 0x04 /* Transmitter interrupt group */ /* Priority Interrupt Level definitions */ #define RC_PILR_MODEM (0x80 | RC_MODEMGRP) #define RC_PILR_RX (0x80 | RC_RXGRP ) #define RC_PILR_TX (0x80 | RC_TXGRP ) Index: head/sys/i386/pci/pci_pir.c =================================================================== --- head/sys/i386/pci/pci_pir.c (revision 344854) +++ head/sys/i386/pci/pci_pir.c (revision 344855) @@ -1,744 +1,744 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi - * Copyright (c) 2004, John Baldwin * All rights reserved. + * Copyright (c) 2004, John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_ISA_INTERRUPTS 16 /* * A link device. Loosely based on the ACPI PCI link device. This doesn't * try to support priorities for different ISA interrupts. */ struct pci_link { TAILQ_ENTRY(pci_link) pl_links; uint8_t pl_id; uint8_t pl_irq; uint16_t pl_irqmask; int pl_references; int pl_routed; }; struct pci_link_lookup { struct pci_link **pci_link_ptr; int bus; int device; int pin; }; struct pci_dev_lookup { uint8_t link; int bus; int device; int pin; }; typedef void pir_entry_handler(struct PIR_entry *entry, struct PIR_intpin* intpin, void *arg); static void pci_print_irqmask(u_int16_t irqs); static int pci_pir_biosroute(int bus, int device, int func, int pin, int irq); static int pci_pir_choose_irq(struct pci_link *pci_link, int irqmask); static void pci_pir_create_links(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg); static void pci_pir_dump_links(void); static struct pci_link *pci_pir_find_link(uint8_t link_id); static void pci_pir_find_link_handler(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg); static void pci_pir_initial_irqs(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg); static void pci_pir_parse(void); static uint8_t pci_pir_search_irq(int bus, int device, int pin); static int pci_pir_valid_irq(struct pci_link *pci_link, int irq); static void pci_pir_walk_table(pir_entry_handler *handler, void *arg); static MALLOC_DEFINE(M_PIR, "$PIR", "$PIR structures"); static struct PIR_table *pci_route_table; static device_t pir_device; static int pci_route_count, pir_bios_irqs, pir_parsed; static TAILQ_HEAD(, pci_link) pci_links; static int pir_interrupt_weight[NUM_ISA_INTERRUPTS]; /* sysctl vars */ SYSCTL_DECL(_hw_pci); /* XXX this likely should live in a header file */ /* IRQs 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15 */ #define PCI_IRQ_OVERRIDE_MASK 0xdef8 static uint32_t pci_irq_override_mask = PCI_IRQ_OVERRIDE_MASK; SYSCTL_INT(_hw_pci, OID_AUTO, irq_override_mask, CTLFLAG_RDTUN, &pci_irq_override_mask, PCI_IRQ_OVERRIDE_MASK, "Mask of allowed irqs to try to route when it has no good clue about\n" "which irqs it should use."); /* * Look for the interrupt routing table. * * We use PCI BIOS's PIR table if it's available. $PIR is the standard way * to do this. Sadly, some machines are not standards conforming and have * _PIR instead. We shrug and cope by looking for both. */ void pci_pir_open(void) { struct PIR_table *pt; uint32_t sigaddr; int i; uint8_t ck, *cv; /* Don't try if we've already found a table. */ if (pci_route_table != NULL) return; /* Look for $PIR and then _PIR. */ sigaddr = bios_sigsearch(0, "$PIR", 4, 16, 0); if (sigaddr == 0) sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0); if (sigaddr == 0) return; /* If we found something, check the checksum and length. */ /* XXX - Use pmap_mapdev()? */ pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); if (pt->pt_header.ph_length <= sizeof(struct PIR_header)) return; for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < (pt->pt_header.ph_length); i++) ck += cv[i]; if (ck != 0) return; /* Ok, we've got a valid table. */ pci_route_table = pt; pci_route_count = (pt->pt_header.ph_length - sizeof(struct PIR_header)) / sizeof(struct PIR_entry); } /* * Find the pci_link structure for a given link ID. */ static struct pci_link * pci_pir_find_link(uint8_t link_id) { struct pci_link *pci_link; TAILQ_FOREACH(pci_link, &pci_links, pl_links) { if (pci_link->pl_id == link_id) return (pci_link); } return (NULL); } /* * Find the link device associated with a PCI device in the table. */ static void pci_pir_find_link_handler(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg) { struct pci_link_lookup *lookup; lookup = (struct pci_link_lookup *)arg; if (entry->pe_bus == lookup->bus && entry->pe_device == lookup->device && intpin - entry->pe_intpin == lookup->pin) *lookup->pci_link_ptr = pci_pir_find_link(intpin->link); } /* * Check to see if a possible IRQ setting is valid. */ static int pci_pir_valid_irq(struct pci_link *pci_link, int irq) { if (!PCI_INTERRUPT_VALID(irq)) return (0); return (pci_link->pl_irqmask & (1 << irq)); } /* * Walk the $PIR executing the worker function for each valid intpin entry * in the table. The handler is passed a pointer to both the entry and * the intpin in the entry. */ static void pci_pir_walk_table(pir_entry_handler *handler, void *arg) { struct PIR_entry *entry; struct PIR_intpin *intpin; int i, pin; entry = &pci_route_table->pt_entry[0]; for (i = 0; i < pci_route_count; i++, entry++) { intpin = &entry->pe_intpin[0]; for (pin = 0; pin < 4; pin++, intpin++) if (intpin->link != 0) handler(entry, intpin, arg); } } static void pci_pir_create_links(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg) { struct pci_link *pci_link; pci_link = pci_pir_find_link(intpin->link); if (pci_link != NULL) { pci_link->pl_references++; if (intpin->irqs != pci_link->pl_irqmask) { if (bootverbose) printf( "$PIR: Entry %d.%d.INT%c has different mask for link %#x, merging\n", entry->pe_bus, entry->pe_device, (intpin - entry->pe_intpin) + 'A', pci_link->pl_id); pci_link->pl_irqmask &= intpin->irqs; } } else { pci_link = malloc(sizeof(struct pci_link), M_PIR, M_WAITOK); pci_link->pl_id = intpin->link; pci_link->pl_irqmask = intpin->irqs; pci_link->pl_irq = PCI_INVALID_IRQ; pci_link->pl_references = 1; pci_link->pl_routed = 0; TAILQ_INSERT_TAIL(&pci_links, pci_link, pl_links); } } /* * Look to see if any of the function on the PCI device at bus/device have * an interrupt routed to intpin 'pin' by the BIOS. */ static uint8_t pci_pir_search_irq(int bus, int device, int pin) { uint32_t value; uint8_t func, maxfunc; /* See if we have a valid device at function 0. */ value = pci_cfgregread(bus, device, 0, PCIR_HDRTYPE, 1); if ((value & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) return (PCI_INVALID_IRQ); if (value & PCIM_MFDEV) maxfunc = PCI_FUNCMAX; else maxfunc = 0; /* Scan all possible functions at this device. */ for (func = 0; func <= maxfunc; func++) { value = pci_cfgregread(bus, device, func, PCIR_DEVVENDOR, 4); if (value == 0xffffffff) continue; value = pci_cfgregread(bus, device, func, PCIR_INTPIN, 1); /* * See if it uses the pin in question. Note that the passed * in pin uses 0 for A, .. 3 for D whereas the intpin * register uses 0 for no interrupt, 1 for A, .. 4 for D. */ if (value != pin + 1) continue; value = pci_cfgregread(bus, device, func, PCIR_INTLINE, 1); if (bootverbose) printf( "$PIR: Found matching pin for %d.%d.INT%c at func %d: %d\n", bus, device, pin + 'A', func, value); if (value != PCI_INVALID_IRQ) return (value); } return (PCI_INVALID_IRQ); } /* * Try to initialize IRQ based on this device's IRQ. */ static void pci_pir_initial_irqs(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg) { struct pci_link *pci_link; uint8_t irq, pin; pin = intpin - entry->pe_intpin; pci_link = pci_pir_find_link(intpin->link); irq = pci_pir_search_irq(entry->pe_bus, entry->pe_device, pin); if (irq == PCI_INVALID_IRQ || irq == pci_link->pl_irq) return; /* Don't trust any BIOS IRQs greater than 15. */ if (irq >= NUM_ISA_INTERRUPTS) { printf( "$PIR: Ignoring invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n", irq, entry->pe_bus, entry->pe_device, pin + 'A', pci_link->pl_id); return; } /* * If we don't have an IRQ for this link yet, then we trust the * BIOS, even if it seems invalid from the $PIR entries. */ if (pci_link->pl_irq == PCI_INVALID_IRQ) { if (!pci_pir_valid_irq(pci_link, irq)) printf( "$PIR: Using invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n", irq, entry->pe_bus, entry->pe_device, pin + 'A', pci_link->pl_id); pci_link->pl_irq = irq; pci_link->pl_routed = 1; return; } /* * We have an IRQ and it doesn't match the current IRQ for this * link. If the new IRQ is invalid, then warn about it and ignore * it. If the old IRQ is invalid and the new IRQ is valid, then * prefer the new IRQ instead. If both IRQs are valid, then just * use the first one. Note that if we ever get into this situation * we are having to guess which setting the BIOS actually routed. * Perhaps we should just give up instead. */ if (!pci_pir_valid_irq(pci_link, irq)) { printf( "$PIR: BIOS IRQ %d for %d.%d.INT%c is not valid for link %#x\n", irq, entry->pe_bus, entry->pe_device, pin + 'A', pci_link->pl_id); } else if (!pci_pir_valid_irq(pci_link, pci_link->pl_irq)) { printf( "$PIR: Preferring valid BIOS IRQ %d from %d.%d.INT%c for link %#x to IRQ %d\n", irq, entry->pe_bus, entry->pe_device, pin + 'A', pci_link->pl_id, pci_link->pl_irq); pci_link->pl_irq = irq; pci_link->pl_routed = 1; } else printf( "$PIR: BIOS IRQ %d for %d.%d.INT%c does not match link %#x irq %d\n", irq, entry->pe_bus, entry->pe_device, pin + 'A', pci_link->pl_id, pci_link->pl_irq); } /* * Parse $PIR to enumerate link devices and attempt to determine their * initial state. This could perhaps be cleaner if we had drivers for the * various interrupt routers as they could read the initial IRQ for each * link. */ static void pci_pir_parse(void) { char tunable_buffer[64]; struct pci_link *pci_link; int i, irq; /* Only parse once. */ if (pir_parsed) return; pir_parsed = 1; /* Enumerate link devices. */ TAILQ_INIT(&pci_links); pci_pir_walk_table(pci_pir_create_links, NULL); if (bootverbose) { printf("$PIR: Links after initial probe:\n"); pci_pir_dump_links(); } /* * Check to see if the BIOS has already routed any of the links by * checking each device connected to each link to see if it has a * valid IRQ. */ pci_pir_walk_table(pci_pir_initial_irqs, NULL); if (bootverbose) { printf("$PIR: Links after initial IRQ discovery:\n"); pci_pir_dump_links(); } /* * Allow the user to override the IRQ for a given link device. We * allow invalid IRQs to be specified but warn about them. An IRQ * of 255 or 0 clears any preset IRQ. */ i = 0; TAILQ_FOREACH(pci_link, &pci_links, pl_links) { snprintf(tunable_buffer, sizeof(tunable_buffer), "hw.pci.link.%#x.irq", pci_link->pl_id); if (getenv_int(tunable_buffer, &irq) == 0) continue; if (irq == 0) irq = PCI_INVALID_IRQ; if (irq != PCI_INVALID_IRQ && !pci_pir_valid_irq(pci_link, irq) && bootverbose) printf( "$PIR: Warning, IRQ %d for link %#x is not listed as valid\n", irq, pci_link->pl_id); pci_link->pl_routed = 0; pci_link->pl_irq = irq; i = 1; } if (bootverbose && i) { printf("$PIR: Links after tunable overrides:\n"); pci_pir_dump_links(); } /* * Build initial interrupt weights as well as bitmap of "known-good" * IRQs that the BIOS has already used for PCI link devices. */ TAILQ_FOREACH(pci_link, &pci_links, pl_links) { if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) continue; pir_bios_irqs |= 1 << pci_link->pl_irq; pir_interrupt_weight[pci_link->pl_irq] += pci_link->pl_references; } if (bootverbose) { printf("$PIR: IRQs used by BIOS: "); pci_print_irqmask(pir_bios_irqs); printf("\n"); printf("$PIR: Interrupt Weights:\n[ "); for (i = 0; i < NUM_ISA_INTERRUPTS; i++) printf(" %3d", i); printf(" ]\n[ "); for (i = 0; i < NUM_ISA_INTERRUPTS; i++) printf(" %3d", pir_interrupt_weight[i]); printf(" ]\n"); } } /* * Use the PCI BIOS to route an interrupt for a given device. * * Input: * AX = PCIBIOS_ROUTE_INTERRUPT * BH = bus * BL = device [7:3] / function [2:0] * CH = IRQ * CL = Interrupt Pin (0x0A = A, ... 0x0D = D) */ static int pci_pir_biosroute(int bus, int device, int func, int pin, int irq) { struct bios_regs args; args.eax = PCIBIOS_ROUTE_INTERRUPT; args.ebx = (bus << 8) | (device << 3) | func; args.ecx = (irq << 8) | (0xa + pin); return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))); } /* * Route a PCI interrupt using a link device from the $PIR. */ int pci_pir_route_interrupt(int bus, int device, int func, int pin) { struct pci_link_lookup lookup; struct pci_link *pci_link; int error, irq; if (pci_route_table == NULL) return (PCI_INVALID_IRQ); /* Lookup link device for this PCI device/pin. */ pci_link = NULL; lookup.bus = bus; lookup.device = device; lookup.pin = pin - 1; lookup.pci_link_ptr = &pci_link; pci_pir_walk_table(pci_pir_find_link_handler, &lookup); if (pci_link == NULL) { printf("$PIR: No matching entry for %d.%d.INT%c\n", bus, device, pin - 1 + 'A'); return (PCI_INVALID_IRQ); } /* * Pick a new interrupt if we don't have one already. We look * for an interrupt from several different sets. First, if * this link only has one valid IRQ, use that. Second, we * check the set of PCI only interrupts from the $PIR. Third, * we check the set of known-good interrupts that the BIOS has * already used. Lastly, we check the "all possible valid * IRQs" set. */ if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) { if (pci_link->pl_irqmask != 0 && powerof2(pci_link->pl_irqmask)) irq = ffs(pci_link->pl_irqmask) - 1; else irq = pci_pir_choose_irq(pci_link, pci_route_table->pt_header.ph_pci_irqs); if (!PCI_INTERRUPT_VALID(irq)) irq = pci_pir_choose_irq(pci_link, pir_bios_irqs); if (!PCI_INTERRUPT_VALID(irq)) irq = pci_pir_choose_irq(pci_link, pci_irq_override_mask); if (!PCI_INTERRUPT_VALID(irq)) { if (bootverbose) printf( "$PIR: Failed to route interrupt for %d:%d INT%c\n", bus, device, pin - 1 + 'A'); return (PCI_INVALID_IRQ); } pci_link->pl_irq = irq; } /* Ask the BIOS to route this IRQ if we haven't done so already. */ if (!pci_link->pl_routed) { error = pci_pir_biosroute(bus, device, func, pin - 1, pci_link->pl_irq); /* Ignore errors when routing a unique interrupt. */ if (error && !powerof2(pci_link->pl_irqmask)) { printf("$PIR: ROUTE_INTERRUPT failed.\n"); return (PCI_INVALID_IRQ); } pci_link->pl_routed = 1; /* Ensure the interrupt is set to level/low trigger. */ KASSERT(pir_device != NULL, ("missing pir device")); BUS_CONFIG_INTR(pir_device, pci_link->pl_irq, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); } if (bootverbose) printf("$PIR: %d:%d INT%c routed to irq %d\n", bus, device, pin - 1 + 'A', pci_link->pl_irq); return (pci_link->pl_irq); } /* * Try to pick an interrupt for the specified link from the interrupts * set in the mask. */ static int pci_pir_choose_irq(struct pci_link *pci_link, int irqmask) { int i, irq, realmask; /* XXX: Need to have a #define of known bad IRQs to also mask out? */ realmask = pci_link->pl_irqmask & irqmask; if (realmask == 0) return (PCI_INVALID_IRQ); /* Find IRQ with lowest weight. */ irq = PCI_INVALID_IRQ; for (i = 0; i < NUM_ISA_INTERRUPTS; i++) { if (!(realmask & 1 << i)) continue; if (irq == PCI_INVALID_IRQ || pir_interrupt_weight[i] < pir_interrupt_weight[irq]) irq = i; } if (bootverbose && PCI_INTERRUPT_VALID(irq)) { printf("$PIR: Found IRQ %d for link %#x from ", irq, pci_link->pl_id); pci_print_irqmask(realmask); printf("\n"); } return (irq); } static void pci_print_irqmask(u_int16_t irqs) { int i, first; if (irqs == 0) { printf("none"); return; } first = 1; for (i = 0; i < 16; i++, irqs >>= 1) if (irqs & 1) { if (!first) printf(" "); else first = 0; printf("%d", i); } } /* * Display link devices. */ static void pci_pir_dump_links(void) { struct pci_link *pci_link; printf("Link IRQ Rtd Ref IRQs\n"); TAILQ_FOREACH(pci_link, &pci_links, pl_links) { printf("%#4x %3d %c %3d ", pci_link->pl_id, pci_link->pl_irq, pci_link->pl_routed ? 'Y' : 'N', pci_link->pl_references); pci_print_irqmask(pci_link->pl_irqmask); printf("\n"); } } /* * See if any interrupts for a given PCI bus are routed in the PIR. Don't * even bother looking if the BIOS doesn't support routing anyways. If we * are probing a PCI-PCI bridge, then require_parse will be true and we should * only succeed if a host-PCI bridge has already attached and parsed the PIR. */ int pci_pir_probe(int bus, int require_parse) { int i; if (pci_route_table == NULL || (require_parse && !pir_parsed)) return (0); for (i = 0; i < pci_route_count; i++) if (pci_route_table->pt_entry[i].pe_bus == bus) return (1); return (0); } /* * The driver for the new-bus pseudo device pir0 for the $PIR table. */ static int pir_probe(device_t dev) { char buf[64]; snprintf(buf, sizeof(buf), "PCI Interrupt Routing Table: %d Entries", pci_route_count); device_set_desc_copy(dev, buf); return (0); } static int pir_attach(device_t dev) { pci_pir_parse(); KASSERT(pir_device == NULL, ("Multiple pir devices")); pir_device = dev; return (0); } static void pir_resume_find_device(struct PIR_entry *entry, struct PIR_intpin *intpin, void *arg) { struct pci_dev_lookup *pd; pd = (struct pci_dev_lookup *)arg; if (intpin->link != pd->link || pd->bus != -1) return; pd->bus = entry->pe_bus; pd->device = entry->pe_device; pd->pin = intpin - entry->pe_intpin; } static int pir_resume(device_t dev) { struct pci_dev_lookup pd; struct pci_link *pci_link; int error; /* Ask the BIOS to re-route each link that was already routed. */ TAILQ_FOREACH(pci_link, &pci_links, pl_links) { if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) { KASSERT(!pci_link->pl_routed, ("link %#x is routed but has invalid PCI IRQ", pci_link->pl_id)); continue; } if (pci_link->pl_routed) { pd.bus = -1; pd.link = pci_link->pl_id; pci_pir_walk_table(pir_resume_find_device, &pd); KASSERT(pd.bus != -1, ("did not find matching entry for link %#x in the $PIR table", pci_link->pl_id)); if (bootverbose) device_printf(dev, "Using %d.%d.INT%c to route link %#x to IRQ %d\n", pd.bus, pd.device, pd.pin + 'A', pci_link->pl_id, pci_link->pl_irq); error = pci_pir_biosroute(pd.bus, pd.device, 0, pd.pin, pci_link->pl_irq); if (error) device_printf(dev, "ROUTE_INTERRUPT on resume for link %#x failed.\n", pci_link->pl_id); } } return (0); } static device_method_t pir_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pir_probe), DEVMETHOD(device_attach, pir_attach), DEVMETHOD(device_resume, pir_resume), { 0, 0 } }; static driver_t pir_driver = { "pir", pir_methods, 1, }; static devclass_t pir_devclass; DRIVER_MODULE(pir, legacy, pir_driver, pir_devclass, 0, 0); Index: head/sys/kern/kern_ktr.c =================================================================== --- head/sys/kern/kern_ktr.c (revision 344854) +++ head/sys/kern/kern_ktr.c (revision 344855) @@ -1,476 +1,475 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module holds the global variables used by KTR and the ktr_tracepoint() * function that does the actual tracing. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktr.h" #include "opt_alq.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifndef KTR_BOOT_ENTRIES #define KTR_BOOT_ENTRIES 1024 #endif #ifndef KTR_ENTRIES #define KTR_ENTRIES 1024 #endif /* Limit the allocations to something manageable. */ #define KTR_ENTRIES_MAX (8 * 1024 * 1024) #ifndef KTR_MASK #define KTR_MASK (0) #endif #ifndef KTR_CPUMASK #define KTR_CPUMASK CPUSET_FSET #endif #ifndef KTR_TIME #define KTR_TIME get_cyclecount() #endif #ifndef KTR_CPU #define KTR_CPU PCPU_GET(cpuid) #endif static MALLOC_DEFINE(M_KTR, "KTR", "KTR"); FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); volatile int ktr_idx = 0; uint64_t ktr_mask = KTR_MASK; uint64_t ktr_compile = KTR_COMPILE; int ktr_entries = KTR_BOOT_ENTRIES; int ktr_version = KTR_VERSION; struct ktr_entry ktr_buf_init[KTR_BOOT_ENTRIES]; struct ktr_entry *ktr_buf = ktr_buf_init; cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK); static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); SYSCTL_UQUAD(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD, &ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel"); static int sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS) { char lktr_cpumask_str[CPUSETBUFSIZ]; cpuset_t imask; int error; cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask); error = sysctl_handle_string(oidp, lktr_cpumask_str, sizeof(lktr_cpumask_str), req); if (error != 0 || req->newptr == NULL) return (error); if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1) return (EINVAL); CPU_COPY(&imask, &ktr_cpumask); return (error); } SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RWTUN | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0, sysctl_debug_ktr_cpumask, "S", "Bitmask of CPUs on which KTR logging is enabled"); static int sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS) { int clear, error; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); if (error || !req->newptr) return (error); if (clear) { bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries); ktr_idx = 0; } return (error); } SYSCTL_PROC(_debug_ktr, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_clear, "I", "Clear KTR Buffer"); /* * This is a sysctl proc so that it is serialized as !MPSAFE along with * the other ktr sysctl procs. */ static int sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS) { uint64_t mask; int error; mask = ktr_mask; error = sysctl_handle_64(oidp, &mask, 0, req); if (error || !req->newptr) return (error); ktr_mask = mask; return (error); } SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_U64 | CTLFLAG_RWTUN, 0, 0, sysctl_debug_ktr_mask, "QU", "Bitmask of KTR event classes for which logging is enabled"); #if KTR_ENTRIES > KTR_BOOT_ENTRIES /* * A simplified version of sysctl_debug_ktr_entries. * No need to care about SMP, scheduling, etc. */ static void ktr_entries_initializer(void *dummy __unused) { uint64_t mask; /* Temporarily disable ktr in case malloc() is being traced. */ mask = ktr_mask; ktr_mask = 0; ktr_buf = malloc(sizeof(*ktr_buf) * KTR_ENTRIES, M_KTR, M_WAITOK | M_ZERO); memcpy(ktr_buf, ktr_buf_init + ktr_idx, (KTR_BOOT_ENTRIES - ktr_idx) * sizeof(*ktr_buf)); if (ktr_idx != 0) { memcpy(ktr_buf + KTR_BOOT_ENTRIES - ktr_idx, ktr_buf_init, ktr_idx * sizeof(*ktr_buf)); ktr_idx = KTR_BOOT_ENTRIES; } ktr_entries = KTR_ENTRIES; ktr_mask = mask; } SYSINIT(ktr_entries_initializer, SI_SUB_KMEM, SI_ORDER_ANY, ktr_entries_initializer, NULL); #endif static int sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS) { uint64_t mask; int entries, error; struct ktr_entry *buf, *oldbuf; entries = ktr_entries; error = sysctl_handle_int(oidp, &entries, 0, req); if (error || !req->newptr) return (error); if (entries > KTR_ENTRIES_MAX) return (ERANGE); /* Disable ktr temporarily. */ mask = ktr_mask; ktr_mask = 0; /* Wait for threads to go idle. */ if ((error = quiesce_all_cpus("ktrent", PCATCH)) != 0) { ktr_mask = mask; return (error); } if (ktr_buf != ktr_buf_init) oldbuf = ktr_buf; else oldbuf = NULL; /* Allocate a new buffer. */ buf = malloc(sizeof(*buf) * entries, M_KTR, M_WAITOK | M_ZERO); /* Install the new buffer and restart ktr. */ ktr_buf = buf; ktr_entries = entries; ktr_idx = 0; ktr_mask = mask; if (oldbuf != NULL) free(oldbuf, M_KTR); return (error); } SYSCTL_PROC(_debug_ktr, OID_AUTO, entries, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_entries, "I", "Number of entries in the KTR buffer"); #ifdef KTR_VERBOSE int ktr_verbose = KTR_VERBOSE; TUNABLE_INT("debug.ktr.verbose", &ktr_verbose); SYSCTL_INT(_debug_ktr, OID_AUTO, verbose, CTLFLAG_RW, &ktr_verbose, 0, ""); #endif #ifdef KTR_ALQ struct alq *ktr_alq; char ktr_alq_file[MAXPATHLEN] = "/tmp/ktr.out"; int ktr_alq_cnt = 0; int ktr_alq_depth = KTR_ENTRIES; int ktr_alq_enabled = 0; int ktr_alq_failed = 0; int ktr_alq_max = 0; SYSCTL_INT(_debug_ktr, OID_AUTO, alq_max, CTLFLAG_RW, &ktr_alq_max, 0, "Maximum number of entries to write"); SYSCTL_INT(_debug_ktr, OID_AUTO, alq_cnt, CTLFLAG_RD, &ktr_alq_cnt, 0, "Current number of written entries"); SYSCTL_INT(_debug_ktr, OID_AUTO, alq_failed, CTLFLAG_RD, &ktr_alq_failed, 0, "Number of times we overran the buffer"); SYSCTL_INT(_debug_ktr, OID_AUTO, alq_depth, CTLFLAG_RW, &ktr_alq_depth, 0, "Number of items in the write buffer"); SYSCTL_STRING(_debug_ktr, OID_AUTO, alq_file, CTLFLAG_RW, ktr_alq_file, sizeof(ktr_alq_file), "KTR logging file"); static int sysctl_debug_ktr_alq_enable(SYSCTL_HANDLER_ARGS) { int error; int enable; enable = ktr_alq_enabled; error = sysctl_handle_int(oidp, &enable, 0, req); if (error || !req->newptr) return (error); if (enable) { if (ktr_alq_enabled) return (0); error = alq_open(&ktr_alq, (const char *)ktr_alq_file, req->td->td_ucred, ALQ_DEFAULT_CMODE, sizeof(struct ktr_entry), ktr_alq_depth); if (error == 0) { ktr_alq_cnt = 0; ktr_alq_failed = 0; ktr_alq_enabled = 1; } } else { if (ktr_alq_enabled == 0) return (0); ktr_alq_enabled = 0; alq_close(ktr_alq); ktr_alq = NULL; } return (error); } SYSCTL_PROC(_debug_ktr, OID_AUTO, alq_enable, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_alq_enable, "I", "Enable KTR logging"); #endif void ktr_tracepoint(uint64_t mask, const char *file, int line, const char *format, u_long arg1, u_long arg2, u_long arg3, u_long arg4, u_long arg5, u_long arg6) { struct ktr_entry *entry; #ifdef KTR_ALQ struct ale *ale = NULL; #endif int newindex, saveindex; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) struct thread *td; #endif int cpu; if (panicstr || kdb_active) return; if ((ktr_mask & mask) == 0 || ktr_buf == NULL) return; cpu = KTR_CPU; if (!CPU_ISSET(cpu, &ktr_cpumask)) return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; if (td->td_pflags & TDP_INKTR) return; td->td_pflags |= TDP_INKTR; #endif #ifdef KTR_ALQ if (ktr_alq_enabled) { if (td->td_critnest == 0 && (TD_IS_IDLETHREAD(td)) == 0 && td != ald_thread) { if (ktr_alq_max && ktr_alq_cnt > ktr_alq_max) goto done; if ((ale = alq_get(ktr_alq, ALQ_NOWAIT)) == NULL) { ktr_alq_failed++; goto done; } ktr_alq_cnt++; entry = (struct ktr_entry *)ale->ae_data; } else { goto done; } } else #endif { do { saveindex = ktr_idx; newindex = (saveindex + 1) % ktr_entries; } while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0); entry = &ktr_buf[saveindex]; } entry->ktr_timestamp = KTR_TIME; entry->ktr_cpu = cpu; entry->ktr_thread = curthread; if (file != NULL) while (strncmp(file, "../", 3) == 0) file += 3; entry->ktr_file = file; entry->ktr_line = line; #ifdef KTR_VERBOSE if (ktr_verbose) { #ifdef SMP printf("cpu%d ", cpu); #endif if (ktr_verbose > 1) { printf("%s.%d\t", entry->ktr_file, entry->ktr_line); } printf(format, arg1, arg2, arg3, arg4, arg5, arg6); printf("\n"); } #endif entry->ktr_desc = format; entry->ktr_parms[0] = arg1; entry->ktr_parms[1] = arg2; entry->ktr_parms[2] = arg3; entry->ktr_parms[3] = arg4; entry->ktr_parms[4] = arg5; entry->ktr_parms[5] = arg6; #ifdef KTR_ALQ if (ktr_alq_enabled && ale) alq_post(ktr_alq, ale); done: #endif #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td->td_pflags &= ~TDP_INKTR; #endif } #ifdef DDB struct tstate { int cur; int first; }; static struct tstate tstate; static int db_ktr_verbose; static int db_mach_vtrace(void); DB_SHOW_COMMAND(ktr, db_ktr_all) { tstate.cur = (ktr_idx - 1) % ktr_entries; tstate.first = -1; db_ktr_verbose = 0; db_ktr_verbose |= (strchr(modif, 'v') != NULL) ? 2 : 0; db_ktr_verbose |= (strchr(modif, 'V') != NULL) ? 1 : 0; /* just timestamp please */ if (strchr(modif, 'a') != NULL) { db_disable_pager(); while (cncheckc() == -1) if (db_mach_vtrace() == 0) break; } else { while (!db_pager_quit) if (db_mach_vtrace() == 0) break; } } static int db_mach_vtrace(void) { struct ktr_entry *kp; if (tstate.cur == tstate.first || ktr_buf == NULL) { db_printf("--- End of trace buffer ---\n"); return (0); } kp = &ktr_buf[tstate.cur]; /* Skip over unused entries. */ if (kp->ktr_desc == NULL) { db_printf("--- End of trace buffer ---\n"); return (0); } db_printf("%d (%p", tstate.cur, kp->ktr_thread); #ifdef SMP db_printf(":cpu%d", kp->ktr_cpu); #endif db_printf(")"); if (db_ktr_verbose >= 1) { db_printf(" %10.10lld", (long long)kp->ktr_timestamp); } if (db_ktr_verbose >= 2) { db_printf(" %s.%d", kp->ktr_file, kp->ktr_line); } db_printf(": "); db_printf(kp->ktr_desc, kp->ktr_parms[0], kp->ktr_parms[1], kp->ktr_parms[2], kp->ktr_parms[3], kp->ktr_parms[4], kp->ktr_parms[5]); db_printf("\n"); if (tstate.first == -1) tstate.first = tstate.cur; if (--tstate.cur < 0) tstate.cur = ktr_entries - 1; return (1); } #endif /* DDB */ Index: head/sys/kern/kern_rwlock.c =================================================================== --- head/sys/kern/kern_rwlock.c (revision 344854) +++ head/sys/kern/kern_rwlock.c (revision 344855) @@ -1,1557 +1,1556 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Machine independent bits of reader/writer lock implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_no_adaptive_rwlocks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) #define ADAPTIVE_RWLOCKS #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* * Return the rwlock address when the lock cookie address is provided. * This functionality assumes that struct rwlock* have a member named rw_lock. */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) #ifdef DDB #include static void db_show_rwlock(const struct lock_object *lock); #endif static void assert_rw(const struct lock_object *lock, int what); static void lock_rw(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_rw(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_rw(struct lock_object *lock); struct lock_class lock_class_rw = { .lc_name = "rw", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_rw, #ifdef DDB .lc_ddb_show = db_show_rwlock, #endif .lc_lock = lock_rw, .lc_unlock = unlock_rw, #ifdef KDTRACE_HOOKS .lc_owner = owner_rw, #endif }; #ifdef ADAPTIVE_RWLOCKS static int __read_frequently rowner_retries; static int __read_frequently rowner_loops; static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging"); SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); static struct lock_delay_config __read_frequently rw_delay; SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 0, ""); SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 0, ""); static void rw_lock_delay_init(void *arg __unused) { lock_delay_default_init(&rw_delay); rowner_retries = 10; rowner_loops = max(10000, rw_delay.max); } LOCK_DELAY_SYSINIT(rw_lock_delay_init); #endif /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. */ #define lv_rw_wowner(v) \ ((v) & RW_LOCK_READ ? NULL : \ (struct thread *)RW_OWNER((v))) #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) /* * Returns if a write owner is recursed. Write ownership is not assured * here and should be previously checked. */ #define rw_recursed(rw) ((rw)->rw_recurse != 0) /* * Return true if curthread helds the lock. */ #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) /* * Return a pointer to the owning thread for this lock who should receive * any priority lent by threads that block on this lock. Currently this * is identical to rw_wowner(). */ #define rw_owner(rw) rw_wowner(rw) #ifndef INVARIANTS #define __rw_assert(c, what, file, line) #endif void assert_rw(const struct lock_object *lock, int what) { rw_assert((const struct rwlock *)lock, what); } void lock_rw(struct lock_object *lock, uintptr_t how) { struct rwlock *rw; rw = (struct rwlock *)lock; if (how) rw_rlock(rw); else rw_wlock(rw); } uintptr_t unlock_rw(struct lock_object *lock) { struct rwlock *rw; rw = (struct rwlock *)lock; rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); if (rw->rw_lock & RW_LOCK_READ) { rw_runlock(rw); return (1); } else { rw_wunlock(rw); return (0); } } #ifdef KDTRACE_HOOKS int owner_rw(const struct lock_object *lock, struct thread **owner) { const struct rwlock *rw = (const struct rwlock *)lock; uintptr_t x = rw->rw_lock; *owner = rw_wowner(rw); return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : (*owner != NULL)); } #endif void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) { struct rwlock *rw; int flags; rw = rwlock2rw(c); MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | RW_RECURSE | RW_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, ("%s: rw_lock not aligned for %s: %p", __func__, name, &rw->rw_lock)); flags = LO_UPGRADABLE; if (opts & RW_DUPOK) flags |= LO_DUPOK; if (opts & RW_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & RW_NOWITNESS)) flags |= LO_WITNESS; if (opts & RW_RECURSE) flags |= LO_RECURSABLE; if (opts & RW_QUIET) flags |= LO_QUIET; if (opts & RW_NEW) flags |= LO_NEW; lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); rw->rw_lock = RW_UNLOCKED; rw->rw_recurse = 0; } void _rw_destroy(volatile uintptr_t *c) { struct rwlock *rw; rw = rwlock2rw(c); KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); rw->rw_lock = RW_DESTROYED; lock_destroy(&rw->lock_object); } void rw_sysinit(void *arg) { struct rw_args *args; args = arg; rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, args->ra_flags); } int _rw_wowned(const volatile uintptr_t *c) { return (rw_wowner(rwlock2rw(c)) == curthread); } void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t tid, v; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || !TD_IS_IDLETHREAD(curthread), ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); tid = (uintptr_t)curthread; v = RW_UNLOCKED; if (!_rw_write_lock_fetch(rw, &v, tid)) _rw_wlock_hard(rw, v, file, line); else LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 0, 0, file, line, LOCKSTAT_WRITER); LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { struct thread *td; uintptr_t tid, v; int rval; bool recursed; td = curthread; tid = (uintptr_t)td; if (SCHEDULER_STOPPED_TD(td)) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); rval = 1; recursed = false; v = RW_UNLOCKED; for (;;) { if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) break; if (v == RW_UNLOCKED) continue; if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { rw->rw_recurse++; atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); break; } rval = 0; break; } LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); if (!recursed) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 0, 0, file, line, LOCKSTAT_WRITER); TD_LOCKS_INC(curthread); } return (rval); } int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); } void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); #ifdef LOCK_PROFILING _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); #else __rw_wunlock(rw, curthread, file, line); #endif TD_LOCKS_DEC(curthread); } /* * Determines whether a new reader can acquire a lock. Succeeds if the * reader already owns a read lock and the lock is locked for read to * prevent deadlock from reader recursion. Also succeeds if the lock * is unlocked and has no writer waiters or spinners. Failing otherwise * prioritizes writers before readers. */ static bool __always_inline __rw_can_read(struct thread *td, uintptr_t v, bool fp) { if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == RW_LOCK_READ) return (true); if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) return (true); return (false); } static bool __always_inline __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp LOCK_FILE_LINE_ARG_DEF) { /* * Handle the easy case. If no other thread has a write * lock, then try to bump up the count of read locks. Note * that we have to preserve the current state of the * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a * read lock, then rw_lock must have changed, so restart * the loop. Note that this handles the case of a * completely unlocked rwlock since such a lock is encoded * as a read lock with no waiters. */ while (__rw_can_read(td, *vp, fp)) { if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, *vp + RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, rw, (void *)*vp, (void *)(*vp + RW_ONE_READER)); td->td_rw_rlocks++; return (true); } } return (false); } static void __noinline __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v LOCK_FILE_LINE_ARG_DEF) { struct turnstile *ts; struct thread *owner; #ifdef ADAPTIVE_RWLOCKS int spintries = 0; int i, n; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) struct lock_delay_arg lda; #endif #ifdef KDTRACE_HOOKS u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) uintptr_t state = 0; int doing_lockprof = 0; #endif #ifdef KDTRACE_HOOKS if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) goto out_lockstat; doing_lockprof = 1; all_time -= lockstat_nsecs(&rw->lock_object); state = v; } #endif #ifdef LOCK_PROFILING doing_lockprof = 1; state = v; #endif if (SCHEDULER_STOPPED()) return; #if defined(ADAPTIVE_RWLOCKS) lock_delay_arg_init(&lda, &rw_delay); #elif defined(KDTRACE_HOOKS) lock_delay_arg_init(&lda, NULL); #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); for (;;) { if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) break; #ifdef KDTRACE_HOOKS lda.spin_cnt++; #endif #ifdef ADAPTIVE_RWLOCKS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); do { lock_delay(&lda); v = RW_READ_VALUE(rw); owner = lv_rw_wowner(v); } while (owner != NULL && TD_IS_RUNNING(owner)); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } else { if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { MPASS(!__rw_can_read(td, v, false)); lock_delay_spin(2); v = RW_READ_VALUE(rw); continue; } if (spintries < rowner_retries) { spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); n = RW_READERS(v); for (i = 0; i < rowner_loops; i += n) { lock_delay_spin(n); v = RW_READ_VALUE(rw); if (!(v & RW_LOCK_READ)) break; n = RW_READERS(v); if (n == 0) break; if (__rw_can_read(td, v, false)) break; } #ifdef KDTRACE_HOOKS lda.spin_cnt += rowner_loops - i; #endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i < rowner_loops) continue; } } #endif /* * Okay, now it's the hard case. Some other thread already * has a write lock or there are write waiters present, * acquire the turnstile lock so we can begin the process * of blocking. */ ts = turnstile_trywait(&rw->lock_object); /* * The lock might have been released while we spun, so * recheck its state and restart the loop if needed. */ v = RW_READ_VALUE(rw); retry_ts: if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || __rw_can_read(td, v, false)) { turnstile_cancel(ts); continue; } owner = lv_rw_wowner(v); #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (owner != NULL) { if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * The lock is held in write mode or it already has waiters. */ MPASS(!__rw_can_read(td, v, false)); /* * If the RW_LOCK_READ_WAITERS flag is already set, then * we can go ahead and block. If it is not set then try * to set it. If we fail to set it drop the turnstile * lock and restart the loop. */ if (!(v & RW_LOCK_READ_WAITERS)) { if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, v | RW_LOCK_READ_WAITERS)) goto retry_ts; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the read waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif MPASS(owner == rw_owner(rw)); turnstile_wait(ts, owner, TS_SHARED_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); v = RW_READ_VALUE(rw); } #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); out_lockstat: #endif /* * TODO: acquire "owner of record" here. Here be turnstile dragons * however. turnstiles don't like owners changing between calls to * turnstile_wait() currently. */ LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, waittime, file, line, LOCKSTAT_READER); } void __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { struct thread *td; uintptr_t v; td = curthread; KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) || !TD_IS_IDLETHREAD(td), ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", td, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); KASSERT(rw_wowner(rw) != td, ("rw_rlock: wlock already held for %s @ %s:%d", rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); v = RW_READ_VALUE(rw); if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); else lock_profile_obtain_lock_success(&rw->lock_object, 0, 0, file, line); LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); TD_LOCKS_INC(curthread); } void __rw_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); __rw_rlock_int(rw LOCK_FILE_LINE_ARG); } int __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { uintptr_t x; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); x = rw->rw_lock; for (;;) { KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); if (!(x & RW_LOCK_READ)) break; if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, line); WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 0, 0, file, line, LOCKSTAT_READER); TD_LOCKS_INC(curthread); curthread->td_rw_rlocks++; return (1); } } LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); return (0); } int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); } static bool __always_inline __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) { for (;;) { if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, *vp - RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, rw, (void *)*vp, (void *)(*vp - RW_ONE_READER)); td->td_rw_rlocks--; return (true); } continue; } break; } return (false); } static void __noinline __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v LOCK_FILE_LINE_ARG_DEF) { struct turnstile *ts; uintptr_t setv, queue; if (SCHEDULER_STOPPED()) return; if (__rw_runlock_try(rw, td, &v)) goto out_lockstat; /* * Ok, we know we have waiters and we think we are the * last reader, so grab the turnstile lock. */ turnstile_chain_lock(&rw->lock_object); v = RW_READ_VALUE(rw); for (;;) { if (__rw_runlock_try(rw, td, &v)) break; MPASS(v & RW_LOCK_WAITERS); /* * Try to drop our lock leaving the lock in a unlocked * state. * * If you wanted to do explicit lock handoff you'd have to * do it here. You'd also want to use turnstile_signal() * and you'd have to handle the race where a higher * priority thread blocks on the write lock before the * thread you wakeup actually runs and have the new thread * "steal" the lock. For now it's a lot simpler to just * wakeup all of the waiters. * * As above, if we fail, then another thread might have * acquired a read lock, so drop the turnstile lock and * restart. */ setv = RW_UNLOCKED; queue = TS_SHARED_QUEUE; if (v & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; setv |= (v & RW_LOCK_READ_WAITERS); } setv |= (v & RW_LOCK_WRITE_SPINNER); if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) continue; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", __func__, rw); /* * Ok. The lock is released and all that's left is to * wake up the waiters. Note that the lock might not be * free anymore, but in that case the writers will just * block again if they run before the new lock holder(s) * release the lock. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); turnstile_broadcast(ts, queue); turnstile_unpend(ts); td->td_rw_rlocks--; break; } turnstile_chain_unlock(&rw->lock_object); out_lockstat: LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); } void _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { struct thread *td; uintptr_t v; KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, 0, file, line); LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); td = curthread; v = RW_READ_VALUE(rw); if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || !__rw_runlock_try(rw, td, &v))) __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); else lock_profile_release_lock(&rw->lock_object); TD_LOCKS_DEC(curthread); } void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); } #ifdef ADAPTIVE_RWLOCKS static inline void rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) { if (v & RW_LOCK_WRITE_SPINNER) return; if (*in_critical) { critical_exit(); *in_critical = false; (*extra_work)--; } } #else #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) #endif /* * This function is called when we are unable to obtain a write lock on the * first try. This means that at least one other thread holds either a * read or write lock. */ void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) { uintptr_t tid; struct rwlock *rw; struct turnstile *ts; struct thread *owner; #ifdef ADAPTIVE_RWLOCKS int spintries = 0; int i, n; enum { READERS, WRITER } sleep_reason = READERS; bool in_critical = false; #endif uintptr_t setv; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) struct lock_delay_arg lda; #endif #ifdef KDTRACE_HOOKS u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) uintptr_t state = 0; int doing_lockprof = 0; #endif int extra_work = 0; tid = (uintptr_t)curthread; rw = rwlock2rw(c); #ifdef KDTRACE_HOOKS if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { while (v == RW_UNLOCKED) { if (_rw_write_lock_fetch(rw, &v, tid)) goto out_lockstat; } extra_work = 1; doing_lockprof = 1; all_time -= lockstat_nsecs(&rw->lock_object); state = v; } #endif #ifdef LOCK_PROFILING extra_work = 1; doing_lockprof = 1; state = v; #endif if (SCHEDULER_STOPPED()) return; #if defined(ADAPTIVE_RWLOCKS) lock_delay_arg_init(&lda, &rw_delay); #elif defined(KDTRACE_HOOKS) lock_delay_arg_init(&lda, NULL); #endif if (__predict_false(v == RW_UNLOCKED)) v = RW_READ_VALUE(rw); if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, ("%s: recursing but non-recursive rw %s @ %s:%d\n", __func__, rw->lock_object.lo_name, file, line)); rw->rw_recurse++; atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); return; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); for (;;) { if (v == RW_UNLOCKED) { if (_rw_write_lock_fetch(rw, &v, tid)) break; continue; } #ifdef KDTRACE_HOOKS lda.spin_cnt++; #endif #ifdef ADAPTIVE_RWLOCKS if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) break; continue; } /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ if (!(v & RW_LOCK_READ)) { rw_drop_critical(v, &in_critical, &extra_work); sleep_reason = WRITER; owner = lv_rw_wowner(v); if (!TD_IS_RUNNING(owner)) goto ts; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); do { lock_delay(&lda); v = RW_READ_VALUE(rw); owner = lv_rw_wowner(v); } while (owner != NULL && TD_IS_RUNNING(owner)); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } else if (RW_READERS(v) > 0) { sleep_reason = READERS; if (spintries == rowner_retries) goto ts; if (!(v & RW_LOCK_WRITE_SPINNER)) { if (!in_critical) { critical_enter(); in_critical = true; extra_work++; } if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, v | RW_LOCK_WRITE_SPINNER)) { critical_exit(); in_critical = false; extra_work--; continue; } } spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); n = RW_READERS(v); for (i = 0; i < rowner_loops; i += n) { lock_delay_spin(n); v = RW_READ_VALUE(rw); if (!(v & RW_LOCK_WRITE_SPINNER)) break; if (!(v & RW_LOCK_READ)) break; n = RW_READERS(v); if (n == 0) break; } #ifdef KDTRACE_HOOKS lda.spin_cnt += i; #endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i < rowner_loops) continue; } ts: #endif ts = turnstile_trywait(&rw->lock_object); v = RW_READ_VALUE(rw); retry_ts: owner = lv_rw_wowner(v); #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (owner != NULL) { if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); rw_drop_critical(v, &in_critical, &extra_work); continue; } } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { turnstile_cancel(ts); rw_drop_critical(v, &in_critical, &extra_work); continue; } #endif /* * Check for the waiters flags about this rwlock. * If the lock was released, without maintain any pending * waiters queue, simply try to acquire it. * If a pending waiters queue is present, claim the lock * ownership and maintain the pending queue. */ setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); if ((v & ~setv) == RW_UNLOCKED) { setv &= ~RW_LOCK_WRITE_SPINNER; if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { if (setv) turnstile_claim(ts); else turnstile_cancel(ts); break; } goto retry_ts; } #ifdef ADAPTIVE_RWLOCKS if (in_critical) { if ((v & RW_LOCK_WRITE_SPINNER) || !((v & RW_LOCK_WRITE_WAITERS))) { setv = v & ~RW_LOCK_WRITE_SPINNER; setv |= RW_LOCK_WRITE_WAITERS; if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) goto retry_ts; } critical_exit(); in_critical = false; extra_work--; } else { #endif /* * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to * set it. If we fail to set it, then loop back and try * again. */ if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, v | RW_LOCK_WRITE_WAITERS)) goto retry_ts; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set write waiters flag", __func__, rw); } #ifdef ADAPTIVE_RWLOCKS } #endif /* * We were unable to acquire the lock and the write waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif MPASS(owner == rw_owner(rw)); turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); #ifdef ADAPTIVE_RWLOCKS spintries = 0; #endif v = RW_READ_VALUE(rw); } if (__predict_true(!extra_work)) return; #ifdef ADAPTIVE_RWLOCKS if (in_critical) critical_exit(); #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); out_lockstat: #endif LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, waittime, file, line, LOCKSTAT_WRITER); } /* * This function is called if lockstat is active or the first try at releasing * a write lock failed. The latter means that the lock is recursed or one of * the 2 waiter bits must be set indicating that at least one thread is waiting * on this lock. */ void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) { struct rwlock *rw; struct turnstile *ts; uintptr_t tid, setv; int queue; tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (__predict_false(v == tid)) v = RW_READ_VALUE(rw); if (v & RW_LOCK_WRITER_RECURSED) { if (--(rw->rw_recurse) == 0) atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); return; } LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); if (v == tid && _rw_write_unlock(rw, tid)) return; KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); turnstile_chain_lock(&rw->lock_object); /* * Use the same algo as sx locks for now. Prefer waking up shared * waiters if we have any over writers. This is probably not ideal. * * 'v' is the value we are going to write back to rw_lock. If we * have waiters on both queues, we need to preserve the state of * the waiter flag for the queue we don't wake up. For now this is * hardcoded for the algorithm mentioned above. * * In the case of both readers and writers waiting we wakeup the * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a * new writer comes in before a reader it will claim the lock up * above. There is probably a potential priority inversion in * there that could be worked around either by waking both queues * of waiters or doing some complicated lock handoff gymnastics. */ setv = RW_UNLOCKED; v = RW_READ_VALUE(rw); queue = TS_SHARED_QUEUE; if (v & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; setv |= (v & RW_LOCK_READ_WAITERS); } atomic_store_rel_ptr(&rw->rw_lock, setv); /* Wake up all waiters for the specific queue. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, queue == TS_SHARED_QUEUE ? "read" : "write"); ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); turnstile_broadcast(ts, queue); turnstile_unpend(ts); turnstile_chain_unlock(&rw->lock_object); } /* * Attempt to do a non-blocking upgrade from a read lock to a write * lock. This will only succeed if this thread holds a single read * lock. Returns true if the upgrade succeeded and false otherwise. */ int __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { uintptr_t v, setv, tid; struct turnstile *ts; int success; if (SCHEDULER_STOPPED()) return (1); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); /* * Attempt to switch from one reader to a writer. If there * are any write waiters, then we will have to lock the * turnstile first to prevent races with another writer * calling turnstile_wait() before we have claimed this * turnstile. So, do the simple case of no waiters first. */ tid = (uintptr_t)curthread; success = 0; v = RW_READ_VALUE(rw); for (;;) { if (RW_READERS(v) > 1) break; if (!(v & RW_LOCK_WAITERS)) { success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); if (!success) continue; break; } /* * Ok, we think we have waiters, so lock the turnstile. */ ts = turnstile_trywait(&rw->lock_object); v = RW_READ_VALUE(rw); retry_ts: if (RW_READERS(v) > 1) { turnstile_cancel(ts); break; } /* * Try to switch from one reader to a writer again. This time * we honor the current state of the waiters flags. * If we obtain the lock with the flags set, then claim * ownership of the turnstile. */ setv = tid | (v & RW_LOCK_WAITERS); success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); if (success) { if (v & RW_LOCK_WAITERS) turnstile_claim(ts); else turnstile_cancel(ts); break; } goto retry_ts; } LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); if (success) { curthread->td_rw_rlocks--; WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(rw__upgrade, rw); } return (success); } int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); } /* * Downgrade a write lock into a single read lock. */ void __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) { struct turnstile *ts; uintptr_t tid, v; int rwait, wwait; if (SCHEDULER_STOPPED()) return; KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); #ifndef INVARIANTS if (rw_recursed(rw)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); /* * Convert from a writer to a single reader. First we handle * the easy case with no waiters. If there are any waiters, we * lock the turnstile and "disown" the lock. */ tid = (uintptr_t)curthread; if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) goto out; /* * Ok, we think we have waiters, so lock the turnstile so we can * read the waiter flags without any races. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & RW_LOCK_WAITERS; rwait = v & RW_LOCK_READ_WAITERS; wwait = v & RW_LOCK_WRITE_WAITERS; MPASS(rwait | wwait); /* * Downgrade from a write lock while preserving waiters flag * and give up ownership of the turnstile. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); if (!wwait) v &= ~RW_LOCK_READ_WAITERS; atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); /* * Wake other readers if there are no writers pending. Otherwise they * won't be able to acquire the lock anyway. */ if (rwait && !wwait) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts); } else turnstile_disown(ts); turnstile_chain_unlock(&rw->lock_object); out: curthread->td_rw_rlocks++; LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(rw__downgrade, rw); } void __rw_downgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; rw = rwlock2rw(c); __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef __rw_assert #endif /* * In the non-WITNESS case, rw_assert() can only detect that at least * *some* thread owns an rlock, but it cannot guarantee that *this* * thread owns an rlock. */ void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); switch (what) { case RA_LOCKED: case RA_LOCKED | RA_RECURSED: case RA_LOCKED | RA_NOTRECURSED: case RA_RLOCKED: case RA_RLOCKED | RA_RECURSED: case RA_RLOCKED | RA_NOTRECURSED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If some other thread has a write lock or we have one * and are asserting a read lock, fail. Also, if no one * has a lock at all, fail. */ if (rw->rw_lock == RW_UNLOCKED || (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || rw_wowner(rw) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", rw->lock_object.lo_name, (what & RA_RLOCKED) ? "read " : "", file, line); if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } #endif break; case RA_WLOCKED: case RA_WLOCKED | RA_RECURSED: case RA_WLOCKED | RA_NOTRECURSED: if (rw_wowner(rw) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); break; case RA_UNLOCKED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If we hold a write lock fail. We can't reliably check * to see if we hold a read lock or not. */ if (rw_wowner(rw) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); #endif break; default: panic("Unknown rw lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB void db_show_rwlock(const struct lock_object *lock) { const struct rwlock *rw; struct thread *td; rw = (const struct rwlock *)lock; db_printf(" state: "); if (rw->rw_lock == RW_UNLOCKED) db_printf("UNLOCKED\n"); else if (rw->rw_lock == RW_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (rw->rw_lock & RW_LOCK_READ) db_printf("RLOCK: %ju locks\n", (uintmax_t)(RW_READERS(rw->rw_lock))); else { td = rw_wowner(rw); db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (rw_recursed(rw)) db_printf(" recursed: %u\n", rw->rw_recurse); } db_printf(" waiters: "); switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { case RW_LOCK_READ_WAITERS: db_printf("readers\n"); break; case RW_LOCK_WRITE_WAITERS: db_printf("writers\n"); break; case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: db_printf("readers and writers\n"); break; default: db_printf("none\n"); break; } } #endif Index: head/sys/kern/subr_lock.c =================================================================== --- head/sys/kern/subr_lock.c (revision 344854) +++ head/sys/kern/subr_lock.c (revision 344855) @@ -1,704 +1,703 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module holds the global variables and functions used to maintain * lock_object structures. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_mprof.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include SDT_PROVIDER_DEFINE(lock); SDT_PROBE_DEFINE1(lock, , , starvation, "u_int"); CTASSERT(LOCK_CLASS_MAX == 15); struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { &lock_class_mtx_spin, &lock_class_mtx_sleep, &lock_class_sx, &lock_class_rm, &lock_class_rm_sleepable, &lock_class_rw, &lock_class_lockmgr, }; void lock_init(struct lock_object *lock, struct lock_class *class, const char *name, const char *type, int flags) { int i; /* Check for double-init and zero object. */ KASSERT(flags & LO_NEW || !lock_initialized(lock), ("lock \"%s\" %p already initialized", name, lock)); /* Look up lock class to find its index. */ for (i = 0; i < LOCK_CLASS_MAX; i++) if (lock_classes[i] == class) { lock->lo_flags = i << LO_CLASSSHIFT; break; } KASSERT(i < LOCK_CLASS_MAX, ("unknown lock class %p", class)); /* Initialize the lock object. */ lock->lo_name = name; lock->lo_flags |= flags | LO_INITIALIZED; LOCK_LOG_INIT(lock, 0); WITNESS_INIT(lock, (type != NULL) ? type : name); } void lock_destroy(struct lock_object *lock) { KASSERT(lock_initialized(lock), ("lock %p is not initialized", lock)); WITNESS_DESTROY(lock); LOCK_LOG_DESTROY(lock, 0); lock->lo_flags &= ~LO_INITIALIZED; } static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); static SYSCTL_NODE(_debug_lock, OID_AUTO, delay, CTLFLAG_RD, NULL, "lock delay"); static u_int __read_mostly starvation_limit = 131072; SYSCTL_INT(_debug_lock_delay, OID_AUTO, starvation_limit, CTLFLAG_RW, &starvation_limit, 0, ""); static u_int __read_mostly restrict_starvation = 0; SYSCTL_INT(_debug_lock_delay, OID_AUTO, restrict_starvation, CTLFLAG_RW, &restrict_starvation, 0, ""); void lock_delay(struct lock_delay_arg *la) { struct lock_delay_config *lc = la->config; u_int i; la->delay <<= 1; if (__predict_false(la->delay > lc->max)) la->delay = lc->max; for (i = la->delay; i > 0; i--) cpu_spinwait(); la->spin_cnt += la->delay; if (__predict_false(la->spin_cnt > starvation_limit)) { SDT_PROBE1(lock, , , starvation, la->delay); if (restrict_starvation) la->delay = lc->base; } } static u_int lock_roundup_2(u_int val) { u_int res; for (res = 1; res <= val; res <<= 1) continue; return (res); } void lock_delay_default_init(struct lock_delay_config *lc) { lc->base = 1; lc->max = lock_roundup_2(mp_ncpus) * 256; if (lc->max > 32678) lc->max = 32678; } #ifdef DDB DB_SHOW_COMMAND(lock, db_show_lock) { struct lock_object *lock; struct lock_class *class; if (!have_addr) return; lock = (struct lock_object *)addr; if (LO_CLASSINDEX(lock) > LOCK_CLASS_MAX) { db_printf("Unknown lock class: %d\n", LO_CLASSINDEX(lock)); return; } class = LOCK_CLASS(lock); db_printf(" class: %s\n", class->lc_name); db_printf(" name: %s\n", lock->lo_name); class->lc_ddb_show(lock); } #endif #ifdef LOCK_PROFILING /* * One object per-thread for each lock the thread owns. Tracks individual * lock instances. */ struct lock_profile_object { LIST_ENTRY(lock_profile_object) lpo_link; struct lock_object *lpo_obj; const char *lpo_file; int lpo_line; uint16_t lpo_ref; uint16_t lpo_cnt; uint64_t lpo_acqtime; uint64_t lpo_waittime; u_int lpo_contest_locking; }; /* * One lock_prof for each (file, line, lock object) triple. */ struct lock_prof { SLIST_ENTRY(lock_prof) link; struct lock_class *class; const char *file; const char *name; int line; int ticks; uintmax_t cnt_wait_max; uintmax_t cnt_max; uintmax_t cnt_tot; uintmax_t cnt_wait; uintmax_t cnt_cur; uintmax_t cnt_contest_locking; }; SLIST_HEAD(lphead, lock_prof); #define LPROF_HASH_SIZE 4096 #define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1) #define LPROF_CACHE_SIZE 4096 /* * Array of objects and profs for each type of object for each cpu. Spinlocks * are handled separately because a thread may be preempted and acquire a * spinlock while in the lock profiling code of a non-spinlock. In this way * we only need a critical section to protect the per-cpu lists. */ struct lock_prof_type { struct lphead lpt_lpalloc; struct lpohead lpt_lpoalloc; struct lphead lpt_hash[LPROF_HASH_SIZE]; struct lock_prof lpt_prof[LPROF_CACHE_SIZE]; struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE]; }; struct lock_prof_cpu { struct lock_prof_type lpc_types[2]; /* One for spin one for other. */ }; struct lock_prof_cpu *lp_cpu[MAXCPU]; volatile int __read_mostly lock_prof_enable; static volatile int lock_prof_resetting; #define LPROF_SBUF_SIZE 256 static int lock_prof_rejected; static int lock_prof_skipspin; static int lock_prof_skipcount; #ifndef USE_CPU_NANOSECONDS uint64_t nanoseconds(void) { struct bintime bt; uint64_t ns; binuptime(&bt); /* From bintime2timespec */ ns = bt.sec * (uint64_t)1000000000; ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32; return (ns); } #endif static void lock_prof_init_type(struct lock_prof_type *type) { int i; SLIST_INIT(&type->lpt_lpalloc); LIST_INIT(&type->lpt_lpoalloc); for (i = 0; i < LPROF_CACHE_SIZE; i++) { SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i], link); LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i], lpo_link); } } static void lock_prof_init(void *arg) { int cpu; for (cpu = 0; cpu <= mp_maxid; cpu++) { lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF, M_WAITOK | M_ZERO); lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]); lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]); } } SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL); static void lock_prof_reset_wait(void) { /* * Spin relinquishing our cpu so that quiesce_all_cpus may * complete. */ while (lock_prof_resetting) sched_relinquish(curthread); } static void lock_prof_reset(void) { struct lock_prof_cpu *lpc; int enabled, i, cpu; /* * We not only race with acquiring and releasing locks but also * thread exit. To be certain that threads exit without valid head * pointers they must see resetting set before enabled is cleared. * Otherwise a lock may not be removed from a per-thread list due * to disabled being set but not wait for reset() to remove it below. */ atomic_store_rel_int(&lock_prof_resetting, 1); enabled = lock_prof_enable; lock_prof_enable = 0; quiesce_all_cpus("profreset", 0); /* * Some objects may have migrated between CPUs. Clear all links * before we zero the structures. Some items may still be linked * into per-thread lists as well. */ for (cpu = 0; cpu <= mp_maxid; cpu++) { lpc = lp_cpu[cpu]; for (i = 0; i < LPROF_CACHE_SIZE; i++) { LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link); LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link); } } for (cpu = 0; cpu <= mp_maxid; cpu++) { lpc = lp_cpu[cpu]; bzero(lpc, sizeof(*lpc)); lock_prof_init_type(&lpc->lpc_types[0]); lock_prof_init_type(&lpc->lpc_types[1]); } atomic_store_rel_int(&lock_prof_resetting, 0); lock_prof_enable = enabled; } static void lock_prof_output(struct lock_prof *lp, struct sbuf *sb) { const char *p; for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3); sbuf_printf(sb, "%8ju %9ju %11ju %11ju %11ju %6ju %6ju %2ju %6ju %s:%d (%s:%s)\n", lp->cnt_max / 1000, lp->cnt_wait_max / 1000, lp->cnt_tot / 1000, lp->cnt_wait / 1000, lp->cnt_cur, lp->cnt_cur == 0 ? (uintmax_t)0 : lp->cnt_tot / (lp->cnt_cur * 1000), lp->cnt_cur == 0 ? (uintmax_t)0 : lp->cnt_wait / (lp->cnt_cur * 1000), (uintmax_t)0, lp->cnt_contest_locking, p, lp->line, lp->class->lc_name, lp->name); } static void lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash, int spin, int t) { struct lock_prof_type *type; struct lock_prof *l; int cpu; dst->file = match->file; dst->line = match->line; dst->class = match->class; dst->name = match->name; for (cpu = 0; cpu <= mp_maxid; cpu++) { if (lp_cpu[cpu] == NULL) continue; type = &lp_cpu[cpu]->lpc_types[spin]; SLIST_FOREACH(l, &type->lpt_hash[hash], link) { if (l->ticks == t) continue; if (l->file != match->file || l->line != match->line || l->name != match->name) continue; l->ticks = t; if (l->cnt_max > dst->cnt_max) dst->cnt_max = l->cnt_max; if (l->cnt_wait_max > dst->cnt_wait_max) dst->cnt_wait_max = l->cnt_wait_max; dst->cnt_tot += l->cnt_tot; dst->cnt_wait += l->cnt_wait; dst->cnt_cur += l->cnt_cur; dst->cnt_contest_locking += l->cnt_contest_locking; } } } static void lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin, int t) { struct lock_prof *l; int i; for (i = 0; i < LPROF_HASH_SIZE; ++i) { SLIST_FOREACH(l, &type->lpt_hash[i], link) { struct lock_prof lp = {}; if (l->ticks == t) continue; lock_prof_sum(l, &lp, i, spin, t); lock_prof_output(&lp, sb); } } } static int dump_lock_prof_stats(SYSCTL_HANDLER_ARGS) { struct sbuf *sb; int error, cpu, t; int enabled; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = sbuf_new_for_sysctl(NULL, NULL, LPROF_SBUF_SIZE, req); sbuf_printf(sb, "\n%8s %9s %11s %11s %11s %6s %6s %2s %6s %s\n", "max", "wait_max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name"); enabled = lock_prof_enable; lock_prof_enable = 0; quiesce_all_cpus("profstat", 0); t = ticks; for (cpu = 0; cpu <= mp_maxid; cpu++) { if (lp_cpu[cpu] == NULL) continue; lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t); lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t); } lock_prof_enable = enabled; error = sbuf_finish(sb); /* Output a trailing NUL. */ if (error == 0) error = SYSCTL_OUT(req, "", 1); sbuf_delete(sb); return (error); } static int enable_lock_prof(SYSCTL_HANDLER_ARGS) { int error, v; v = lock_prof_enable; error = sysctl_handle_int(oidp, &v, v, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == lock_prof_enable) return (0); if (v == 1) lock_prof_reset(); lock_prof_enable = !!v; return (0); } static int reset_lock_prof_stats(SYSCTL_HANDLER_ARGS) { int error, v; v = 0; error = sysctl_handle_int(oidp, &v, 0, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == 0) return (0); lock_prof_reset(); return (0); } static struct lock_prof * lock_profile_lookup(struct lock_object *lo, int spin, const char *file, int line) { const char *unknown = "(unknown)"; struct lock_prof_type *type; struct lock_prof *lp; struct lphead *head; const char *p; u_int hash; p = file; if (p == NULL || *p == '\0') p = unknown; hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line; hash &= LPROF_HASH_MASK; type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; head = &type->lpt_hash[hash]; SLIST_FOREACH(lp, head, link) { if (lp->line == line && lp->file == p && lp->name == lo->lo_name) return (lp); } lp = SLIST_FIRST(&type->lpt_lpalloc); if (lp == NULL) { lock_prof_rejected++; return (lp); } SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link); lp->file = p; lp->line = line; lp->class = LOCK_CLASS(lo); lp->name = lo->lo_name; SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link); return (lp); } static struct lock_profile_object * lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file, int line) { struct lock_profile_object *l; struct lock_prof_type *type; struct lpohead *head; head = &curthread->td_lprof[spin]; LIST_FOREACH(l, head, lpo_link) if (l->lpo_obj == lo && l->lpo_file == file && l->lpo_line == line) return (l); type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; l = LIST_FIRST(&type->lpt_lpoalloc); if (l == NULL) { lock_prof_rejected++; return (NULL); } LIST_REMOVE(l, lpo_link); l->lpo_obj = lo; l->lpo_file = file; l->lpo_line = line; l->lpo_cnt = 0; LIST_INSERT_HEAD(head, l, lpo_link); return (l); } void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line) { static int lock_prof_count; struct lock_profile_object *l; int spin; if (SCHEDULER_STOPPED()) return; /* don't reset the timer when/if recursing */ if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE)) return; if (lock_prof_skipcount && (++lock_prof_count % lock_prof_skipcount) != 0) return; spin = (LOCK_CLASS(lo)->lc_flags & LC_SPINLOCK) ? 1 : 0; if (spin && lock_prof_skipspin == 1) return; critical_enter(); /* Recheck enabled now that we're in a critical section. */ if (lock_prof_enable == 0) goto out; l = lock_profile_object_lookup(lo, spin, file, line); if (l == NULL) goto out; l->lpo_cnt++; if (++l->lpo_ref > 1) goto out; l->lpo_contest_locking = contested; l->lpo_acqtime = nanoseconds(); if (waittime && (l->lpo_acqtime > waittime)) l->lpo_waittime = l->lpo_acqtime - waittime; else l->lpo_waittime = 0; out: critical_exit(); } void lock_profile_thread_exit(struct thread *td) { #ifdef INVARIANTS struct lock_profile_object *l; MPASS(curthread->td_critnest == 0); #endif /* * If lock profiling was disabled we have to wait for reset to * clear our pointers before we can exit safely. */ lock_prof_reset_wait(); #ifdef INVARIANTS LIST_FOREACH(l, &td->td_lprof[0], lpo_link) printf("thread still holds lock acquired at %s:%d\n", l->lpo_file, l->lpo_line); LIST_FOREACH(l, &td->td_lprof[1], lpo_link) printf("thread still holds lock acquired at %s:%d\n", l->lpo_file, l->lpo_line); #endif MPASS(LIST_FIRST(&td->td_lprof[0]) == NULL); MPASS(LIST_FIRST(&td->td_lprof[1]) == NULL); } void lock_profile_release_lock(struct lock_object *lo) { struct lock_profile_object *l; struct lock_prof_type *type; struct lock_prof *lp; uint64_t curtime, holdtime; struct lpohead *head; int spin; if (SCHEDULER_STOPPED()) return; if (lo->lo_flags & LO_NOPROFILE) return; spin = (LOCK_CLASS(lo)->lc_flags & LC_SPINLOCK) ? 1 : 0; head = &curthread->td_lprof[spin]; if (LIST_FIRST(head) == NULL) return; critical_enter(); /* Recheck enabled now that we're in a critical section. */ if (lock_prof_enable == 0 && lock_prof_resetting == 1) goto out; /* * If lock profiling is not enabled we still want to remove the * lpo from our queue. */ LIST_FOREACH(l, head, lpo_link) if (l->lpo_obj == lo) break; if (l == NULL) goto out; if (--l->lpo_ref > 0) goto out; lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line); if (lp == NULL) goto release; curtime = nanoseconds(); if (curtime < l->lpo_acqtime) goto release; holdtime = curtime - l->lpo_acqtime; /* * Record if the lock has been held longer now than ever * before. */ if (holdtime > lp->cnt_max) lp->cnt_max = holdtime; if (l->lpo_waittime > lp->cnt_wait_max) lp->cnt_wait_max = l->lpo_waittime; lp->cnt_tot += holdtime; lp->cnt_wait += l->lpo_waittime; lp->cnt_contest_locking += l->lpo_contest_locking; lp->cnt_cur += l->lpo_cnt; release: LIST_REMOVE(l, lpo_link); type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin]; LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link); out: critical_exit(); } static SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling"); SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW, &lock_prof_skipspin, 0, "Skip profiling on spinlocks."); SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW, &lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions."); SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD, &lock_prof_rejected, 0, "Number of rejected profiling records"); SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics"); SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics"); SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, enable_lock_prof, "I", "Enable lock profiling"); #endif Index: head/sys/kern/subr_sleepqueue.c =================================================================== --- head/sys/kern/subr_sleepqueue.c (revision 344854) +++ head/sys/kern/subr_sleepqueue.c (revision 344855) @@ -1,1454 +1,1453 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Implementation of sleep queues used to hold queue of threads blocked on * a wait channel. Sleep queues are different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile * and sleep queue implementations. (Note: turnstiles were implemented * first.) For example, both use a hash table of the same size where each * bucket is referred to as a "chain" that contains both a spin lock and * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to * embed its queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same * wait channel in the case of multiple waiters. * * Some additional functionality provided by sleep queues include the * ability to set a timeout. The timeout is managed using a per-thread * callout that resumes a thread if it is asleep. A thread may also * catch signals while it is asleep (aka an interruptible sleep). The * signal code uses sleepq_abort() to interrupt a sleeping thread. Finally, * sleep queues also provide some extra assertions. One is not allowed to * mix the sleep/wakeup and cv APIs for a given wait channel. Also, one * must consistently use the same lock to synchronize with a wait channel, * though this check is currently only a warning for sleep/wakeup due to * pre-existing abuse of that API. The same lock must also be held when * awakening threads, though that is currently only enforced for condition * variables. */ #include __FBSDID("$FreeBSD$"); #include "opt_sleepqueue_profiling.h" #include "opt_ddb.h" #include "opt_sched.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* * Constants for the hash table of sleep queue chains. * SC_TABLESIZE must be a power of two for SC_MASK to work properly. */ #ifndef SC_TABLESIZE #define SC_TABLESIZE 256 #endif CTASSERT(powerof2(SC_TABLESIZE)); #define SC_MASK (SC_TABLESIZE - 1) #define SC_SHIFT 8 #define SC_HASH(wc) ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \ SC_MASK) #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* * There are two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached * to a wait channel. * * Each sleep queue also contains the wait channel it is attached to, the * list of threads blocked on that wait channel, flags specific to the * wait channel, and the lock used to synchronize with a wait channel. * The flags are used to catch mismatches between the various consumers * of the sleep queue API (e.g. sleep/wakeup and condition variables). * The lock pointer is only used when invariants are enabled for various * debugging checks. * * Locking key: * c - sleep queue chain lock */ struct sleepqueue { TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */ u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */ LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ void *sq_wchan; /* (c) Wait channel. */ int sq_type; /* (c) Queue type. */ #ifdef INVARIANTS struct lock_object *sq_lock; /* (c) Associated lock. */ #endif }; struct sleepqueue_chain { LIST_HEAD(, sleepqueue) sc_queues; /* List of sleep queues. */ struct mtx sc_lock; /* Spin lock for this chain. */ #ifdef SLEEPQUEUE_PROFILING u_int sc_depth; /* Length of sc_queues. */ u_int sc_max_depth; /* Max length of sc_queues. */ #endif } __aligned(CACHE_LINE_SIZE); #ifdef SLEEPQUEUE_PROFILING u_int sleepq_max_depth; static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling"); static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0, "sleepq chain stats"); SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth, 0, "maxmimum depth achieved of a single chain"); static void sleepq_profile(const char *wmesg); static int prof_enabled; #endif static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE]; static uma_zone_t sleepq_zone; /* * Prototypes for non-exported routines. */ static int sleepq_catch_signals(void *wchan, int pri); static int sleepq_check_signals(void); static int sleepq_check_timeout(void); #ifdef INVARIANTS static void sleepq_dtor(void *mem, int size, void *arg); #endif static int sleepq_init(void *mem, int size, int flags); static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri); static void sleepq_switch(void *wchan, int pri); static void sleepq_timeout(void *arg); SDT_PROBE_DECLARE(sched, , , sleep); SDT_PROBE_DECLARE(sched, , , wakeup); /* * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes. * Note that it must happen after sleepinit() has been fully executed, so * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup. */ #ifdef SLEEPQUEUE_PROFILING static void init_sleepqueue_profiling(void) { char chain_name[10]; struct sysctl_oid *chain_oid; u_int i; for (i = 0; i < SC_TABLESIZE; i++) { snprintf(chain_name, sizeof(chain_name), "%u", i); chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO, chain_name, CTLFLAG_RD, NULL, "sleepq chain stats"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0, NULL); } } SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY, init_sleepqueue_profiling, NULL); #endif /* * Early initialization of sleep queues that is called from the sleepinit() * SYSINIT. */ void init_sleepqueues(void) { int i; for (i = 0; i < SC_TABLESIZE; i++) { LIST_INIT(&sleepq_chains[i].sc_queues); mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL, MTX_SPIN | MTX_RECURSE); } sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue), #ifdef INVARIANTS NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #else NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #endif thread0.td_sleepqueue = sleepq_alloc(); } /* * Get a sleep queue for a new thread. */ struct sleepqueue * sleepq_alloc(void) { return (uma_zalloc(sleepq_zone, M_WAITOK)); } /* * Free a sleep queue when a thread is destroyed. */ void sleepq_free(struct sleepqueue *sq) { uma_zfree(sleepq_zone, sq); } /* * Lock the sleep queue chain associated with the specified wait channel. */ void sleepq_lock(void *wchan) { struct sleepqueue_chain *sc; sc = SC_LOOKUP(wchan); mtx_lock_spin(&sc->sc_lock); } /* * Look up the sleep queue associated with a given wait channel in the hash * table locking the associated sleep queue chain. If no queue is found in * the table, NULL is returned. */ struct sleepqueue * sleepq_lookup(void *wchan) { struct sleepqueue_chain *sc; struct sleepqueue *sq; KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); LIST_FOREACH(sq, &sc->sc_queues, sq_hash) if (sq->sq_wchan == wchan) return (sq); return (NULL); } /* * Unlock the sleep queue chain associated with a given wait channel. */ void sleepq_release(void *wchan) { struct sleepqueue_chain *sc; sc = SC_LOOKUP(wchan); mtx_unlock_spin(&sc->sc_lock); } /* * Places the current thread on the sleep queue for the specified wait * channel. If INVARIANTS is enabled, then it associates the passed in * lock with the sleepq to make sure it is held when that sleep queue is * woken up. */ void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, int queue) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); MPASS(td->td_sleepqueue != NULL); MPASS(wchan != NULL); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); /* If this thread is not allowed to sleep, die a horrible death. */ KASSERT(td->td_no_sleeping == 0, ("%s: td %p to sleep on wchan %p with sleeping prohibited", __func__, td, wchan)); /* Look up the sleep queue associated with the wait channel 'wchan'. */ sq = sleepq_lookup(wchan); /* * If the wait channel does not already have a sleep queue, use * this thread's sleep queue. Otherwise, insert the current thread * into the sleep queue already in use by this wait channel. */ if (sq == NULL) { #ifdef INVARIANTS int i; sq = td->td_sleepqueue; for (i = 0; i < NR_SLEEPQS; i++) { KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]), ("thread's sleep queue %d is not empty", i)); KASSERT(sq->sq_blockedcnt[i] == 0, ("thread's sleep queue %d count mismatches", i)); } KASSERT(LIST_EMPTY(&sq->sq_free), ("thread's sleep queue has a non-empty free list")); KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); sq->sq_lock = lock; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth++; if (sc->sc_depth > sc->sc_max_depth) { sc->sc_max_depth = sc->sc_depth; if (sc->sc_max_depth > sleepq_max_depth) sleepq_max_depth = sc->sc_max_depth; } #endif sq = td->td_sleepqueue; LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); sq->sq_wchan = wchan; sq->sq_type = flags & SLEEPQ_TYPE; } else { MPASS(wchan == sq->sq_wchan); MPASS(lock == sq->sq_lock); MPASS((flags & SLEEPQ_TYPE) == sq->sq_type); LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash); } thread_lock(td); TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq); sq->sq_blockedcnt[queue]++; td->td_sleepqueue = NULL; td->td_sqqueue = queue; td->td_wchan = wchan; td->td_wmesg = wmesg; if (flags & SLEEPQ_INTERRUPTIBLE) { td->td_flags |= TDF_SINTR; td->td_flags &= ~TDF_SLEEPABORT; } thread_unlock(td); } /* * Sets a timeout that will remove the current thread from the specified * sleep queue after timo ticks if the thread has not already been awakened. */ void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags) { struct sleepqueue_chain *sc __unused; struct thread *td; sbintime_t pr1; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_sleepqueue == NULL); MPASS(wchan != NULL); if (cold && td == &thread0) panic("timed sleep before timers are working"); KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx", td->td_tid, td, (uintmax_t)td->td_sleeptimo)); thread_lock(td); callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1); thread_unlock(td); callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1, sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC | C_DIRECT_EXEC); } /* * Return the number of actual sleepers for the specified queue. */ u_int sleepq_sleepcnt(void *wchan, int queue) { struct sleepqueue *sq; KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); return (sq->sq_blockedcnt[queue]); } /* * Marks the pending sleep of the current thread as interruptible and * makes an initial check for pending signals before putting a thread * to sleep. Enters and exits with the thread lock held. Thread lock * may have transitioned from the sleepq lock to a run lock. */ static int sleepq_catch_signals(void *wchan, int pri) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; struct proc *p; struct sigacts *ps; int sig, ret; ret = 0; td = curthread; p = curproc; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); MPASS(wchan != NULL); if ((td->td_pflags & TDP_WAKEUP) != 0) { td->td_pflags &= ~TDP_WAKEUP; ret = EINTR; thread_lock(td); goto out; } /* * See if there are any pending signals or suspension requests for this * thread. If not, we can switch immediately. */ thread_lock(td); if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) { thread_unlock(td); mtx_unlock_spin(&sc->sc_lock); CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", (void *)td, (long)p->p_pid, td->td_name); PROC_LOCK(p); /* * Check for suspension first. Checking for signals and then * suspending could result in a missed signal, since a signal * can be delivered while this thread is suspended. */ if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { ret = thread_suspend_check(1); MPASS(ret == 0 || ret == EINTR || ret == ERESTART); if (ret != 0) { PROC_UNLOCK(p); mtx_lock_spin(&sc->sc_lock); thread_lock(td); goto out; } } if ((td->td_flags & TDF_NEEDSIGCHK) != 0) { ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sig = cursig(td); if (sig == -1) { mtx_unlock(&ps->ps_mtx); KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); KASSERT(TD_SBDRY_INTR(td), ("lost TDF_SERESTART of TDF_SEINTR")); KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != (TDF_SEINTR | TDF_SERESTART), ("both TDF_SEINTR and TDF_SERESTART")); ret = TD_SBDRY_ERRNO(td); } else if (sig != 0) { ret = SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR : ERESTART; mtx_unlock(&ps->ps_mtx); } else { mtx_unlock(&ps->ps_mtx); } } /* * Lock the per-process spinlock prior to dropping the PROC_LOCK * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and * thread_lock() are currently held in tdsendsignal(). */ PROC_SLOCK(p); mtx_lock_spin(&sc->sc_lock); PROC_UNLOCK(p); thread_lock(td); PROC_SUNLOCK(p); } if (ret == 0) { sleepq_switch(wchan, pri); return (0); } out: /* * There were pending signals and this thread is still * on the sleep queue, remove it from the sleep queue. */ if (TD_ON_SLEEPQ(td)) { sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { #ifdef INVARIANTS /* * This thread hasn't gone to sleep yet, so it * should not be swapped out. */ panic("not waking up swapper"); #endif } } mtx_unlock_spin(&sc->sc_lock); MPASS(td->td_lock != &sc->sc_lock); return (ret); } /* * Switches to another thread if we are still asleep on a sleep queue. * Returns with thread lock. */ static void sleepq_switch(void *wchan, int pri) { struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If we have a sleep queue, then we've already been woken up, so * just return. */ if (td->td_sleepqueue != NULL) { mtx_unlock_spin(&sc->sc_lock); return; } /* * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the * thread and return. * * Do the same if the real-time clock has been adjusted since this * thread calculated its timeout based on that clock. This handles * the following race: * - The Ts thread needs to sleep until an absolute real-clock time. * It copies the global rtc_generation into curthread->td_rtcgen, * reads the RTC, and calculates a sleep duration based on that time. * See umtxq_sleep() for an example. * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes * threads that are sleeping until an absolute real-clock time. * See tc_setclock() and the POSIX specification of clock_settime(). * - Ts reaches the code below. It holds the sleepqueue chain lock, * so Tc has finished waking, so this thread must test td_rtcgen. * (The declaration of td_rtcgen refers to this comment.) */ rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation; if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { if (rtc_changed) { td->td_rtcgen = 0; } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { #ifdef INVARIANTS /* * This thread hasn't gone to sleep yet, so it * should not be swapped out. */ panic("not waking up swapper"); #endif } mtx_unlock_spin(&sc->sc_lock); return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) sleepq_profile(td->td_wmesg); #endif MPASS(td->td_sleepqueue == NULL); sched_sleep(td, pri); thread_lock_set(td, &sc->sc_lock); SDT_PROBE0(sched, , , sleep); TD_SET_SLEEPING(td); mi_switch(SW_VOL | SWT_SLEEPQ, NULL); KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING")); CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); } /* * Check to see if we timed out. */ static int sleepq_check_timeout(void) { struct thread *td; int res; td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If TDF_TIMEOUT is set, we timed out. But recheck * td_sleeptimo anyway. */ res = 0; if (td->td_sleeptimo != 0) { if (td->td_sleeptimo <= sbinuptime()) res = EWOULDBLOCK; td->td_sleeptimo = 0; } if (td->td_flags & TDF_TIMEOUT) td->td_flags &= ~TDF_TIMEOUT; else /* * We ignore the situation where timeout subsystem was * unable to stop our callout. The struct thread is * type-stable, the callout will use the correct * memory when running. The checks of the * td_sleeptimo value in this function and in * sleepq_timeout() ensure that the thread does not * get spurious wakeups, even if the callout was reset * or thread reused. */ callout_stop(&td->td_slpcallout); return (res); } /* * Check to see if we were awoken by a signal. */ static int sleepq_check_signals(void) { struct thread *td; td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* We are no longer in an interruptible sleep. */ if (td->td_flags & TDF_SINTR) td->td_flags &= ~TDF_SINTR; if (td->td_flags & TDF_SLEEPABORT) { td->td_flags &= ~TDF_SLEEPABORT; return (td->td_intrval); } return (0); } /* * Block the current thread until it is awakened from its sleep queue. */ void sleepq_wait(void *wchan, int pri) { struct thread *td; td = curthread; MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); thread_unlock(td); } /* * Block the current thread until it is awakened from its sleep queue * or it is interrupted by a signal. */ int sleepq_wait_sig(void *wchan, int pri) { int rcatch; int rval; rcatch = sleepq_catch_signals(wchan, pri); rval = sleepq_check_signals(); thread_unlock(curthread); if (rcatch) return (rcatch); return (rval); } /* * Block the current thread until it is awakened from its sleep queue * or it times out while waiting. */ int sleepq_timedwait(void *wchan, int pri) { struct thread *td; int rval; td = curthread; MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); rval = sleepq_check_timeout(); thread_unlock(td); return (rval); } /* * Block the current thread until it is awakened from its sleep queue, * it is interrupted by a signal, or it times out waiting to be awakened. */ int sleepq_timedwait_sig(void *wchan, int pri) { int rcatch, rvalt, rvals; rcatch = sleepq_catch_signals(wchan, pri); rvalt = sleepq_check_timeout(); rvals = sleepq_check_signals(); thread_unlock(curthread); if (rcatch) return (rcatch); if (rvals) return (rvals); return (rvalt); } /* * Returns the type of sleepqueue given a waitchannel. */ int sleepq_type(void *wchan) { struct sleepqueue *sq; int type; MPASS(wchan != NULL); sleepq_lock(wchan); sq = sleepq_lookup(wchan); if (sq == NULL) { sleepq_release(wchan); return (-1); } type = sq->sq_type; sleepq_release(wchan); return (type); } /* * Removes a thread from a sleep queue and makes it * runnable. */ static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri) { struct sleepqueue_chain *sc __unused; MPASS(td != NULL); MPASS(sq->sq_wchan != NULL); MPASS(td->td_wchan == sq->sq_wchan); MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0); THREAD_LOCK_ASSERT(td, MA_OWNED); sc = SC_LOOKUP(sq->sq_wchan); mtx_assert(&sc->sc_lock, MA_OWNED); SDT_PROBE2(sched, , , wakeup, td, td->td_proc); /* Remove the thread from the queue. */ sq->sq_blockedcnt[td->td_sqqueue]--; TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq); /* * Get a sleep queue for this thread. If this is the last waiter, * use the queue itself and take it out of the chain, otherwise, * remove a queue from the free list. */ if (LIST_EMPTY(&sq->sq_free)) { td->td_sleepqueue = sq; #ifdef INVARIANTS sq->sq_wchan = NULL; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth--; #endif } else td->td_sleepqueue = LIST_FIRST(&sq->sq_free); LIST_REMOVE(td->td_sleepqueue, sq_hash); td->td_wmesg = NULL; td->td_wchan = NULL; td->td_flags &= ~TDF_SINTR; CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, td->td_name); /* Adjust priority if requested. */ MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX)); if (pri != 0 && td->td_priority > pri && PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, pri); /* * Note that thread td might not be sleeping if it is running * sleepq_catch_signals() on another CPU or is blocked on its * proc lock to check signals. There's no need to mark the * thread runnable in that case. */ if (TD_IS_SLEEPING(td)) { TD_CLR_SLEEPING(td); return (setrunnable(td)); } return (0); } #ifdef INVARIANTS /* * UMA zone item deallocator. */ static void sleepq_dtor(void *mem, int size, void *arg) { struct sleepqueue *sq; int i; sq = mem; for (i = 0; i < NR_SLEEPQS; i++) { MPASS(TAILQ_EMPTY(&sq->sq_blocked[i])); MPASS(sq->sq_blockedcnt[i] == 0); } } #endif /* * UMA zone item initializer. */ static int sleepq_init(void *mem, int size, int flags) { struct sleepqueue *sq; int i; bzero(mem, size); sq = mem; for (i = 0; i < NR_SLEEPQS; i++) { TAILQ_INIT(&sq->sq_blocked[i]); sq->sq_blockedcnt[i] = 0; } LIST_INIT(&sq->sq_free); return (0); } /* * Find the highest priority thread sleeping on a wait channel and resume it. */ int sleepq_signal(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; struct thread *td, *besttd; int wakeup_swapper; CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); /* * Find the highest priority thread on the queue. If there is a * tie, use the thread that first appears in the queue as it has * been sleeping the longest since threads are always added to * the tail of sleep queues. */ besttd = TAILQ_FIRST(&sq->sq_blocked[queue]); TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) { if (td->td_priority < besttd->td_priority) besttd = td; } MPASS(besttd != NULL); thread_lock(besttd); wakeup_swapper = sleepq_resume_thread(sq, besttd, pri); thread_unlock(besttd); return (wakeup_swapper); } static bool match_any(struct thread *td __unused) { return (true); } /* * Resume all threads sleeping on a specified wait channel. */ int sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); sq = sleepq_lookup(wchan); if (sq == NULL) return (0); KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); return (sleepq_remove_matching(sq, queue, match_any, pri)); } /* * Resume threads on the sleep queue that match the given predicate. */ int sleepq_remove_matching(struct sleepqueue *sq, int queue, bool (*matches)(struct thread *), int pri) { struct thread *td, *tdn; int wakeup_swapper; /* * The last thread will be given ownership of sq and may * re-enqueue itself before sleepq_resume_thread() returns, * so we must cache the "next" queue item at the beginning * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); if (matches(td)) wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } return (wakeup_swapper); } /* * Time sleeping threads out. When the timeout expires, the thread is * removed from the sleep queue and made runnable if it is still asleep. */ static void sleepq_timeout(void *arg) { struct sleepqueue_chain *sc __unused; struct sleepqueue *sq; struct thread *td; void *wchan; int wakeup_swapper; td = arg; wakeup_swapper = 0; CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); thread_lock(td); if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) { /* * The thread does not want a timeout (yet). */ } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { /* * See if the thread is asleep and get the wait * channel if it is. */ wchan = td->td_wchan; sc = SC_LOOKUP(wchan); THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); sq = sleepq_lookup(wchan); MPASS(sq != NULL); td->td_flags |= TDF_TIMEOUT; wakeup_swapper = sleepq_resume_thread(sq, td, 0); } else if (TD_ON_SLEEPQ(td)) { /* * If the thread is on the SLEEPQ but isn't sleeping * yet, it can either be on another CPU in between * sleepq_add() and one of the sleepq_*wait*() * routines or it can be in sleepq_catch_signals(). */ td->td_flags |= TDF_TIMEOUT; } thread_unlock(td); if (wakeup_swapper) kick_proc0(); } /* * Resumes a specific thread from the sleep queue associated with a specific * wait channel if it is on that queue. */ void sleepq_remove(struct thread *td, void *wchan) { struct sleepqueue *sq; int wakeup_swapper; /* * Look up the sleep queue for this wait channel, then re-check * that the thread is asleep on that channel, if it is not, then * bail. */ MPASS(wchan != NULL); sleepq_lock(wchan); sq = sleepq_lookup(wchan); /* * We can not lock the thread here as it may be sleeping on a * different sleepq. However, holding the sleepq lock for this * wchan can guarantee that we do not miss a wakeup for this * channel. The asserts below will catch any false positives. */ if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) { sleepq_release(wchan); return; } /* Thread is asleep on sleep queue sq, so wake it up. */ thread_lock(td); MPASS(sq != NULL); MPASS(td->td_wchan == wchan); wakeup_swapper = sleepq_resume_thread(sq, td, 0); thread_unlock(td); sleepq_release(wchan); if (wakeup_swapper) kick_proc0(); } /* * Abort a thread as if an interrupt had occurred. Only abort * interruptible waits (unfortunately it isn't safe to abort others). */ int sleepq_abort(struct thread *td, int intrval) { struct sleepqueue *sq; void *wchan; THREAD_LOCK_ASSERT(td, MA_OWNED); MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_flags & TDF_SINTR); MPASS(intrval == EINTR || intrval == ERESTART); /* * If the TDF_TIMEOUT flag is set, just leave. A * timeout is scheduled anyhow. */ if (td->td_flags & TDF_TIMEOUT) return (0); CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); td->td_intrval = intrval; td->td_flags |= TDF_SLEEPABORT; /* * If the thread has not slept yet it will find the signal in * sleepq_catch_signals() and call sleepq_resume_thread. Otherwise * we have to do it here. */ if (!TD_IS_SLEEPING(td)) return (0); wchan = td->td_wchan; MPASS(wchan != NULL); sq = sleepq_lookup(wchan); MPASS(sq != NULL); /* Thread is asleep on sleep queue sq, so wake it up. */ return (sleepq_resume_thread(sq, td, 0)); } void sleepq_chains_remove_matching(bool (*matches)(struct thread *)) { struct sleepqueue_chain *sc; struct sleepqueue *sq, *sq1; int i, wakeup_swapper; wakeup_swapper = 0; for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { if (LIST_EMPTY(&sc->sc_queues)) { continue; } mtx_lock_spin(&sc->sc_lock); LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) { for (i = 0; i < NR_SLEEPQS; ++i) { wakeup_swapper |= sleepq_remove_matching(sq, i, matches, 0); } } mtx_unlock_spin(&sc->sc_lock); } if (wakeup_swapper) { kick_proc0(); } } /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually * printed. Typically, this will equal the number of threads sleeping on the * queue, but may be less if sb overflowed before all stacks were printed. */ #ifdef STACK int sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue, int *count_stacks_printed) { struct thread *td, *td_next; struct sleepqueue *sq; struct stack **st; struct sbuf **td_infos; int i, stack_idx, error, stacks_to_allocate; bool finished; error = 0; finished = false; KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); MPASS((queue >= 0) && (queue < NR_SLEEPQS)); stacks_to_allocate = 10; for (i = 0; i < 3 && !finished ; i++) { /* We cannot malloc while holding the queue's spinlock, so * we do our mallocs now, and hope it is enough. If it * isn't, we will free these, drop the lock, malloc more, * and try again, up to a point. After that point we will * give up and report ENOMEM. We also cannot write to sb * during this time since the client may have set the * SBUF_AUTOEXTEND flag on their sbuf, which could cause a * malloc as we print to it. So we defer actually printing * to sb until after we drop the spinlock. */ /* Where we will store the stacks. */ st = malloc(sizeof(struct stack *) * stacks_to_allocate, M_TEMP, M_WAITOK); for (stack_idx = 0; stack_idx < stacks_to_allocate; stack_idx++) st[stack_idx] = stack_create(M_WAITOK); /* Where we will store the td name, tid, etc. */ td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate, M_TEMP, M_WAITOK); for (stack_idx = 0; stack_idx < stacks_to_allocate; stack_idx++) td_infos[stack_idx] = sbuf_new(NULL, NULL, MAXCOMLEN + sizeof(struct thread *) * 2 + 40, SBUF_FIXEDLEN); sleepq_lock(wchan); sq = sleepq_lookup(wchan); if (sq == NULL) { /* This sleepq does not exist; exit and return ENOENT. */ error = ENOENT; finished = true; sleepq_release(wchan); goto loop_end; } stack_idx = 0; /* Save thread info */ TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, td_next) { if (stack_idx >= stacks_to_allocate) goto loop_end; /* Note the td_lock is equal to the sleepq_lock here. */ stack_save_td(st[stack_idx], td); sbuf_printf(td_infos[stack_idx], "%d: %s %p", td->td_tid, td->td_name, td); ++stack_idx; } finished = true; sleepq_release(wchan); /* Print the stacks */ for (i = 0; i < stack_idx; i++) { sbuf_finish(td_infos[i]); sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i])); stack_sbuf_print(sb, st[i]); sbuf_printf(sb, "\n"); error = sbuf_error(sb); if (error == 0) *count_stacks_printed = stack_idx; } loop_end: if (!finished) sleepq_release(wchan); for (stack_idx = 0; stack_idx < stacks_to_allocate; stack_idx++) stack_destroy(st[stack_idx]); for (stack_idx = 0; stack_idx < stacks_to_allocate; stack_idx++) sbuf_delete(td_infos[stack_idx]); free(st, M_TEMP); free(td_infos, M_TEMP); stacks_to_allocate *= 10; } if (!finished && error == 0) error = ENOMEM; return (error); } #endif #ifdef SLEEPQUEUE_PROFILING #define SLEEPQ_PROF_LOCATIONS 1024 #define SLEEPQ_SBUFSIZE 512 struct sleepq_prof { LIST_ENTRY(sleepq_prof) sp_link; const char *sp_wmesg; long sp_count; }; LIST_HEAD(sqphead, sleepq_prof); struct sqphead sleepq_prof_free; struct sqphead sleepq_hash[SC_TABLESIZE]; static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS]; static struct mtx sleepq_prof_lock; MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN); static void sleepq_profile(const char *wmesg) { struct sleepq_prof *sp; mtx_lock_spin(&sleepq_prof_lock); if (prof_enabled == 0) goto unlock; LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link) if (sp->sp_wmesg == wmesg) goto done; sp = LIST_FIRST(&sleepq_prof_free); if (sp == NULL) goto unlock; sp->sp_wmesg = wmesg; LIST_REMOVE(sp, sp_link); LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link); done: sp->sp_count++; unlock: mtx_unlock_spin(&sleepq_prof_lock); return; } static void sleepq_prof_reset(void) { struct sleepq_prof *sp; int enabled; int i; mtx_lock_spin(&sleepq_prof_lock); enabled = prof_enabled; prof_enabled = 0; for (i = 0; i < SC_TABLESIZE; i++) LIST_INIT(&sleepq_hash[i]); LIST_INIT(&sleepq_prof_free); for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) { sp = &sleepq_profent[i]; sp->sp_wmesg = NULL; sp->sp_count = 0; LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link); } prof_enabled = enabled; mtx_unlock_spin(&sleepq_prof_lock); } static int enable_sleepq_prof(SYSCTL_HANDLER_ARGS) { int error, v; v = prof_enabled; error = sysctl_handle_int(oidp, &v, v, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == prof_enabled) return (0); if (v == 1) sleepq_prof_reset(); mtx_lock_spin(&sleepq_prof_lock); prof_enabled = !!v; mtx_unlock_spin(&sleepq_prof_lock); return (0); } static int reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS) { int error, v; v = 0; error = sysctl_handle_int(oidp, &v, 0, req); if (error) return (error); if (req->newptr == NULL) return (error); if (v == 0) return (0); sleepq_prof_reset(); return (0); } static int dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS) { struct sleepq_prof *sp; struct sbuf *sb; int enabled; int error; int i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req); sbuf_printf(sb, "\nwmesg\tcount\n"); enabled = prof_enabled; mtx_lock_spin(&sleepq_prof_lock); prof_enabled = 0; mtx_unlock_spin(&sleepq_prof_lock); for (i = 0; i < SC_TABLESIZE; i++) { LIST_FOREACH(sp, &sleepq_hash[i], sp_link) { sbuf_printf(sb, "%s\t%ld\n", sp->sp_wmesg, sp->sp_count); } } mtx_lock_spin(&sleepq_prof_lock); prof_enabled = enabled; mtx_unlock_spin(&sleepq_prof_lock); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics"); SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, reset_sleepq_prof_stats, "I", "Reset sleepqueue profiling statistics"); SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling"); #endif #ifdef DDB DB_SHOW_COMMAND(sleepq, db_show_sleepqueue) { struct sleepqueue_chain *sc; struct sleepqueue *sq; #ifdef INVARIANTS struct lock_object *lock; #endif struct thread *td; void *wchan; int i; if (!have_addr) return; /* * First, see if there is an active sleep queue for the wait channel * indicated by the address. */ wchan = (void *)addr; sc = SC_LOOKUP(wchan); LIST_FOREACH(sq, &sc->sc_queues, sq_hash) if (sq->sq_wchan == wchan) goto found; /* * Second, see if there is an active sleep queue at the address * indicated. */ for (i = 0; i < SC_TABLESIZE; i++) LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) { if (sq == (struct sleepqueue *)addr) goto found; } db_printf("Unable to locate a sleep queue via %p\n", (void *)addr); return; found: db_printf("Wait channel: %p\n", sq->sq_wchan); db_printf("Queue type: %d\n", sq->sq_type); #ifdef INVARIANTS if (sq->sq_lock) { lock = sq->sq_lock; db_printf("Associated Interlock: %p - (%s) %s\n", lock, LOCK_CLASS(lock)->lc_name, lock->lo_name); } #endif db_printf("Blocked threads:\n"); for (i = 0; i < NR_SLEEPQS; i++) { db_printf("\nQueue[%d]:\n", i); if (TAILQ_EMPTY(&sq->sq_blocked[i])) db_printf("\tempty\n"); else TAILQ_FOREACH(td, &sq->sq_blocked[i], td_slpq) { db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); } db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]); } } /* Alias 'show sleepqueue' to 'show sleepq'. */ DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue); #endif Index: head/sys/kern/subr_smp.c =================================================================== --- head/sys/kern/subr_smp.c (revision 344854) +++ head/sys/kern/subr_smp.c (revision 344855) @@ -1,1174 +1,1173 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001, John Baldwin . - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module holds the global variables and machine independent functions * used for the kernel SMP support. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_sched.h" #ifdef SMP MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data"); volatile cpuset_t stopped_cpus; volatile cpuset_t started_cpus; volatile cpuset_t suspended_cpus; cpuset_t hlt_cpus_mask; cpuset_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS); /* This is used in modules that need to work in both SMP and UP. */ cpuset_t all_cpus; int mp_ncpus; /* export this for libkvm consumers. */ int mp_maxcpus = MAXCPU; volatile int smp_started; u_int mp_maxid; static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, "Kernel SMP"); SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, "Max CPU ID."); SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus, 0, "Max number of CPUs that the system was compiled for."); SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD|CTLTYPE_INT|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode"); int smp_disabled = 0; /* has smp been disabled? */ SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD, &smp_disabled, 0, "SMP has been disabled from the loader"); int smp_cpus = 1; /* how many cpu's running */ SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0, "Number of CPUs online"); int smp_threads_per_core = 1; /* how many SMT threads are running per core */ SYSCTL_INT(_kern_smp, OID_AUTO, threads_per_core, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_threads_per_core, 0, "Number of SMT threads online per core"); int mp_ncores = -1; /* how many physical cores running */ SYSCTL_INT(_kern_smp, OID_AUTO, cores, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncores, 0, "Number of CPUs online"); int smp_topology = 0; /* Which topology we're using. */ SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0, "Topology override setting; 0 is default provided by hardware."); #ifdef SMP /* Enable forwarding of a signal to a process running on a different CPU */ static int forward_signal_enabled = 1; SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, &forward_signal_enabled, 0, "Forwarding of a signal to a process on a different CPU"); /* Variables needed for SMP rendezvous. */ static volatile int smp_rv_ncpus; static void (*volatile smp_rv_setup_func)(void *arg); static void (*volatile smp_rv_action_func)(void *arg); static void (*volatile smp_rv_teardown_func)(void *arg); static void *volatile smp_rv_func_arg; static volatile int smp_rv_waiters[4]; /* * Shared mutex to restrict busywaits between smp_rendezvous() and * smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these * functions trigger at once and cause multiple CPUs to busywait with * interrupts disabled. */ struct mtx smp_ipi_mtx; /* * Let the MD SMP code initialize mp_maxid very early if it can. */ static void mp_setmaxid(void *dummy) { cpu_mp_setmaxid(); KASSERT(mp_ncpus >= 1, ("%s: CPU count < 1", __func__)); KASSERT(mp_ncpus > 1 || mp_maxid == 0, ("%s: one CPU but mp_maxid is not zero", __func__)); KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL); /* * Call the MD SMP initialization code. */ static void mp_start(void *dummy) { mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN); /* Probe for MP hardware. */ if (smp_disabled != 0 || cpu_mp_probe() == 0) { mp_ncores = 1; mp_ncpus = 1; CPU_SETOF(PCPU_GET(cpuid), &all_cpus); return; } cpu_mp_start(); printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n", mp_ncpus); /* Provide a default for most architectures that don't have SMT/HTT. */ if (mp_ncores < 0) mp_ncores = mp_ncpus; cpu_mp_announce(); } SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL); void forward_signal(struct thread *td) { int id; /* * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on * this thread, so all we need to do is poke it if it is currently * executing so that it executes ast(). */ THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("forward_signal: thread is not TDS_RUNNING")); CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc); if (!smp_started || cold || panicstr) return; if (!forward_signal_enabled) return; /* No need to IPI ourself. */ if (td == curthread) return; id = td->td_oncpu; if (id == NOCPU) return; ipi_cpu(id, IPI_AST); } /* * When called the executing CPU will send an IPI to all other CPUs * requesting that they halt execution. * * Usually (but not necessarily) called with 'other_cpus' as its arg. * * - Signals all CPUs in map to stop. * - Waits for each to stop. * * Returns: * -1: error * 0: NA * 1: ok * */ #if defined(__amd64__) || defined(__i386__) #define X86 1 #else #define X86 0 #endif static int generic_stop_cpus(cpuset_t map, u_int type) { #ifdef KTR char cpusetbuf[CPUSETBUFSIZ]; #endif static volatile u_int stopping_cpu = NOCPU; int i; volatile cpuset_t *cpus; KASSERT( type == IPI_STOP || type == IPI_STOP_HARD #if X86 || type == IPI_SUSPEND #endif , ("%s: invalid stop type", __func__)); if (!smp_started) return (0); CTR2(KTR_SMP, "stop_cpus(%s) with %u type", cpusetobj_strprint(cpusetbuf, &map), type); #if X86 /* * When suspending, ensure there are are no IPIs in progress. * IPIs that have been issued, but not yet delivered (e.g. * not pending on a vCPU when running under virtualization) * will be lost, violating FreeBSD's assumption of reliable * IPI delivery. */ if (type == IPI_SUSPEND) mtx_lock_spin(&smp_ipi_mtx); #endif #if X86 if (!nmi_is_broadcast || nmi_kdb_lock == 0) { #endif if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, PCPU_GET(cpuid)) == 0) while (stopping_cpu != NOCPU) cpu_spinwait(); /* spin */ /* send the stop IPI to all CPUs in map */ ipi_selected(map, type); #if X86 } #endif #if X86 if (type == IPI_SUSPEND) cpus = &suspended_cpus; else #endif cpus = &stopped_cpus; i = 0; while (!CPU_SUBSET(cpus, &map)) { /* spin */ cpu_spinwait(); i++; if (i == 100000000) { printf("timeout stopping cpus\n"); break; } } #if X86 if (type == IPI_SUSPEND) mtx_unlock_spin(&smp_ipi_mtx); #endif stopping_cpu = NOCPU; return (1); } int stop_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP)); } int stop_cpus_hard(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP_HARD)); } #if X86 int suspend_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_SUSPEND)); } #endif /* * Called by a CPU to restart stopped CPUs. * * Usually (but not necessarily) called with 'stopped_cpus' as its arg. * * - Signals all CPUs in map to restart. * - Waits for each to restart. * * Returns: * -1: error * 0: NA * 1: ok */ static int generic_restart_cpus(cpuset_t map, u_int type) { #ifdef KTR char cpusetbuf[CPUSETBUFSIZ]; #endif volatile cpuset_t *cpus; KASSERT(type == IPI_STOP || type == IPI_STOP_HARD #if X86 || type == IPI_SUSPEND #endif , ("%s: invalid stop type", __func__)); if (!smp_started) return (0); CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); #if X86 if (type == IPI_SUSPEND) cpus = &resuming_cpus; else #endif cpus = &stopped_cpus; /* signal other cpus to restart */ #if X86 if (type == IPI_SUSPEND) CPU_COPY_STORE_REL(&map, &toresume_cpus); else #endif CPU_COPY_STORE_REL(&map, &started_cpus); #if X86 if (!nmi_is_broadcast || nmi_kdb_lock == 0) { #endif /* wait for each to clear its bit */ while (CPU_OVERLAP(cpus, &map)) cpu_spinwait(); #if X86 } #endif return (1); } int restart_cpus(cpuset_t map) { return (generic_restart_cpus(map, IPI_STOP)); } #if X86 int resume_cpus(cpuset_t map) { return (generic_restart_cpus(map, IPI_SUSPEND)); } #endif #undef X86 /* * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), * rendezvous again, execute the teardown function (if specified), and then * resume. * * Note that the supplied external functions _must_ be reentrant and aware * that they are running in parallel and in an unknown lock context. */ void smp_rendezvous_action(void) { struct thread *td; void *local_func_arg; void (*local_setup_func)(void*); void (*local_action_func)(void*); void (*local_teardown_func)(void*); #ifdef INVARIANTS int owepreempt; #endif /* Ensure we have up-to-date values. */ atomic_add_acq_int(&smp_rv_waiters[0], 1); while (smp_rv_waiters[0] < smp_rv_ncpus) cpu_spinwait(); /* Fetch rendezvous parameters after acquire barrier. */ local_func_arg = smp_rv_func_arg; local_setup_func = smp_rv_setup_func; local_action_func = smp_rv_action_func; local_teardown_func = smp_rv_teardown_func; /* * Use a nested critical section to prevent any preemptions * from occurring during a rendezvous action routine. * Specifically, if a rendezvous handler is invoked via an IPI * and the interrupted thread was in the critical_exit() * function after setting td_critnest to 0 but before * performing a deferred preemption, this routine can be * invoked with td_critnest set to 0 and td_owepreempt true. * In that case, a critical_exit() during the rendezvous * action would trigger a preemption which is not permitted in * a rendezvous action. To fix this, wrap all of the * rendezvous action handlers in a critical section. We * cannot use a regular critical section however as having * critical_exit() preempt from this routine would also be * problematic (the preemption must not occur before the IPI * has been acknowledged via an EOI). Instead, we * intentionally ignore td_owepreempt when leaving the * critical section. This should be harmless because we do * not permit rendezvous action routines to schedule threads, * and thus td_owepreempt should never transition from 0 to 1 * during this routine. */ td = curthread; td->td_critnest++; #ifdef INVARIANTS owepreempt = td->td_owepreempt; #endif /* * If requested, run a setup function before the main action * function. Ensure all CPUs have completed the setup * function before moving on to the action function. */ if (local_setup_func != smp_no_rendezvous_barrier) { if (smp_rv_setup_func != NULL) smp_rv_setup_func(smp_rv_func_arg); atomic_add_int(&smp_rv_waiters[1], 1); while (smp_rv_waiters[1] < smp_rv_ncpus) cpu_spinwait(); } if (local_action_func != NULL) local_action_func(local_func_arg); if (local_teardown_func != smp_no_rendezvous_barrier) { /* * Signal that the main action has been completed. If a * full exit rendezvous is requested, then all CPUs will * wait here until all CPUs have finished the main action. */ atomic_add_int(&smp_rv_waiters[2], 1); while (smp_rv_waiters[2] < smp_rv_ncpus) cpu_spinwait(); if (local_teardown_func != NULL) local_teardown_func(local_func_arg); } /* * Signal that the rendezvous is fully completed by this CPU. * This means that no member of smp_rv_* pseudo-structure will be * accessed by this target CPU after this point; in particular, * memory pointed by smp_rv_func_arg. * * The release semantic ensures that all accesses performed by * the current CPU are visible when smp_rendezvous_cpus() * returns, by synchronizing with the * atomic_load_acq_int(&smp_rv_waiters[3]). */ atomic_add_rel_int(&smp_rv_waiters[3], 1); td->td_critnest--; KASSERT(owepreempt == td->td_owepreempt, ("rendezvous action changed td_owepreempt")); } void smp_rendezvous_cpus(cpuset_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { int curcpumap, i, ncpus = 0; /* Look comments in the !SMP case. */ if (!smp_started) { spinlock_enter(); if (setup_func != NULL) setup_func(arg); if (action_func != NULL) action_func(arg); if (teardown_func != NULL) teardown_func(arg); spinlock_exit(); return; } CPU_FOREACH(i) { if (CPU_ISSET(i, &map)) ncpus++; } if (ncpus == 0) panic("ncpus is 0 with non-zero map"); mtx_lock_spin(&smp_ipi_mtx); /* Pass rendezvous parameters via global variables. */ smp_rv_ncpus = ncpus; smp_rv_setup_func = setup_func; smp_rv_action_func = action_func; smp_rv_teardown_func = teardown_func; smp_rv_func_arg = arg; smp_rv_waiters[1] = 0; smp_rv_waiters[2] = 0; smp_rv_waiters[3] = 0; atomic_store_rel_int(&smp_rv_waiters[0], 0); /* * Signal other processors, which will enter the IPI with * interrupts off. */ curcpumap = CPU_ISSET(curcpu, &map); CPU_CLR(curcpu, &map); ipi_selected(map, IPI_RENDEZVOUS); /* Check if the current CPU is in the map */ if (curcpumap != 0) smp_rendezvous_action(); /* * Ensure that the master CPU waits for all the other * CPUs to finish the rendezvous, so that smp_rv_* * pseudo-structure and the arg are guaranteed to not * be in use. * * Load acquire synchronizes with the release add in * smp_rendezvous_action(), which ensures that our caller sees * all memory actions done by the called functions on other * CPUs. */ while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus) cpu_spinwait(); mtx_unlock_spin(&smp_ipi_mtx); } void smp_rendezvous(void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg); } static struct cpu_group group[MAXCPU * MAX_CACHE_LEVELS + 1]; struct cpu_group * smp_topo(void) { char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; struct cpu_group *top; /* * Check for a fake topology request for debugging purposes. */ switch (smp_topology) { case 1: /* Dual core with no sharing. */ top = smp_topo_1level(CG_SHARE_NONE, 2, 0); break; case 2: /* No topology, all cpus are equal. */ top = smp_topo_none(); break; case 3: /* Dual core with shared L2. */ top = smp_topo_1level(CG_SHARE_L2, 2, 0); break; case 4: /* quad core, shared l3 among each package, private l2. */ top = smp_topo_1level(CG_SHARE_L3, 4, 0); break; case 5: /* quad core, 2 dualcore parts on each package share l2. */ top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0); break; case 6: /* Single-core 2xHTT */ top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT); break; case 7: /* quad core with a shared l3, 8 threads sharing L2. */ top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8, CG_FLAG_SMT); break; default: /* Default, ask the system what it wants. */ top = cpu_topo(); break; } /* * Verify the returned topology. */ if (top->cg_count != mp_ncpus) panic("Built bad topology at %p. CPU count %d != %d", top, top->cg_count, mp_ncpus); if (CPU_CMP(&top->cg_mask, &all_cpus)) panic("Built bad topology at %p. CPU mask (%s) != (%s)", top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), cpusetobj_strprint(cpusetbuf2, &all_cpus)); /* * Collapse nonsense levels that may be created out of convenience by * the MD layers. They cause extra work in the search functions. */ while (top->cg_children == 1) { top = &top->cg_child[0]; top->cg_parent = NULL; } return (top); } struct cpu_group * smp_topo_alloc(u_int count) { static u_int index; u_int curr; curr = index; index += count; return (&group[curr]); } struct cpu_group * smp_topo_none(void) { struct cpu_group *top; top = &group[0]; top->cg_parent = NULL; top->cg_child = NULL; top->cg_mask = all_cpus; top->cg_count = mp_ncpus; top->cg_children = 0; top->cg_level = CG_SHARE_NONE; top->cg_flags = 0; return (top); } static int smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, int count, int flags, int start) { char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; cpuset_t mask; int i; CPU_ZERO(&mask); for (i = 0; i < count; i++, start++) CPU_SET(start, &mask); child->cg_parent = parent; child->cg_child = NULL; child->cg_children = 0; child->cg_level = share; child->cg_count = count; child->cg_flags = flags; child->cg_mask = mask; parent->cg_children++; for (; parent != NULL; parent = parent->cg_parent) { if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) panic("Duplicate children in %p. mask (%s) child (%s)", parent, cpusetobj_strprint(cpusetbuf, &parent->cg_mask), cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); CPU_OR(&parent->cg_mask, &child->cg_mask); parent->cg_count += child->cg_count; } return (start); } struct cpu_group * smp_topo_1level(int share, int count, int flags) { struct cpu_group *child; struct cpu_group *top; int packages; int cpu; int i; cpu = 0; top = &group[0]; packages = mp_ncpus / count; top->cg_child = child = &group[1]; top->cg_level = CG_SHARE_NONE; for (i = 0; i < packages; i++, child++) cpu = smp_topo_addleaf(top, child, share, count, flags, cpu); return (top); } struct cpu_group * smp_topo_2level(int l2share, int l2count, int l1share, int l1count, int l1flags) { struct cpu_group *top; struct cpu_group *l1g; struct cpu_group *l2g; int cpu; int i; int j; cpu = 0; top = &group[0]; l2g = &group[1]; top->cg_child = l2g; top->cg_level = CG_SHARE_NONE; top->cg_children = mp_ncpus / (l2count * l1count); l1g = l2g + top->cg_children; for (i = 0; i < top->cg_children; i++, l2g++) { l2g->cg_parent = top; l2g->cg_child = l1g; l2g->cg_level = l2share; for (j = 0; j < l2count; j++, l1g++) cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count, l1flags, cpu); } return (top); } struct cpu_group * smp_topo_find(struct cpu_group *top, int cpu) { struct cpu_group *cg; cpuset_t mask; int children; int i; CPU_SETOF(cpu, &mask); cg = top; for (;;) { if (!CPU_OVERLAP(&cg->cg_mask, &mask)) return (NULL); if (cg->cg_children == 0) return (cg); children = cg->cg_children; for (i = 0, cg = cg->cg_child; i < children; cg++, i++) if (CPU_OVERLAP(&cg->cg_mask, &mask)) break; } return (NULL); } #else /* !SMP */ void smp_rendezvous_cpus(cpuset_t map, void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), void *arg) { /* * In the !SMP case we just need to ensure the same initial conditions * as the SMP case. */ spinlock_enter(); if (setup_func != NULL) setup_func(arg); if (action_func != NULL) action_func(arg); if (teardown_func != NULL) teardown_func(arg); spinlock_exit(); } void smp_rendezvous(void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), void *arg) { smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg); } /* * Provide dummy SMP support for UP kernels. Modules that need to use SMP * APIs will still work using this dummy support. */ static void mp_setvariables_for_up(void *dummy) { mp_ncpus = 1; mp_ncores = 1; mp_maxid = PCPU_GET(cpuid); CPU_SETOF(mp_maxid, &all_cpus); KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero")); } SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setvariables_for_up, NULL); #endif /* SMP */ void smp_no_rendezvous_barrier(void *dummy) { #ifdef SMP KASSERT((!smp_started),("smp_no_rendezvous called and smp is started")); #endif } /* * Wait for specified idle threads to switch once. This ensures that even * preempted threads have cycled through the switch function once, * exiting their codepaths. This allows us to change global pointers * with no other synchronization. */ int quiesce_cpus(cpuset_t map, const char *wmesg, int prio) { struct pcpu *pcpu; u_int gen[MAXCPU]; int error; int cpu; error = 0; for (cpu = 0; cpu <= mp_maxid; cpu++) { if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) continue; pcpu = pcpu_find(cpu); gen[cpu] = pcpu->pc_idlethread->td_generation; } for (cpu = 0; cpu <= mp_maxid; cpu++) { if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) continue; pcpu = pcpu_find(cpu); thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); while (gen[cpu] == pcpu->pc_idlethread->td_generation) { error = tsleep(quiesce_cpus, prio, wmesg, 1); if (error != EWOULDBLOCK) goto out; error = 0; } } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (error); } int quiesce_all_cpus(const char *wmesg, int prio) { return quiesce_cpus(all_cpus, wmesg, prio); } /* Extra care is taken with this sysctl because the data type is volatile */ static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS) { int error, active; active = smp_started; error = SYSCTL_OUT(req, &active, sizeof(active)); return (error); } #ifdef SMP void topo_init_node(struct topo_node *node) { bzero(node, sizeof(*node)); TAILQ_INIT(&node->children); } void topo_init_root(struct topo_node *root) { topo_init_node(root); root->type = TOPO_TYPE_SYSTEM; } /* * Add a child node with the given ID under the given parent. * Do nothing if there is already a child with that ID. */ struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid, topo_node_type type, uintptr_t subtype) { struct topo_node *node; TAILQ_FOREACH_REVERSE(node, &parent->children, topo_children, siblings) { if (node->hwid == hwid && node->type == type && node->subtype == subtype) { return (node); } } node = malloc(sizeof(*node), M_TOPO, M_WAITOK); topo_init_node(node); node->parent = parent; node->hwid = hwid; node->type = type; node->subtype = subtype; TAILQ_INSERT_TAIL(&parent->children, node, siblings); parent->nchildren++; return (node); } /* * Find a child node with the given ID under the given parent. */ struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid, topo_node_type type, uintptr_t subtype) { struct topo_node *node; TAILQ_FOREACH(node, &parent->children, siblings) { if (node->hwid == hwid && node->type == type && node->subtype == subtype) { return (node); } } return (NULL); } /* * Given a node change the order of its parent's child nodes such * that the node becomes the firt child while preserving the cyclic * order of the children. In other words, the given node is promoted * by rotation. */ void topo_promote_child(struct topo_node *child) { struct topo_node *next; struct topo_node *node; struct topo_node *parent; parent = child->parent; next = TAILQ_NEXT(child, siblings); TAILQ_REMOVE(&parent->children, child, siblings); TAILQ_INSERT_HEAD(&parent->children, child, siblings); while (next != NULL) { node = next; next = TAILQ_NEXT(node, siblings); TAILQ_REMOVE(&parent->children, node, siblings); TAILQ_INSERT_AFTER(&parent->children, child, node, siblings); child = node; } } /* * Iterate to the next node in the depth-first search (traversal) of * the topology tree. */ struct topo_node * topo_next_node(struct topo_node *top, struct topo_node *node) { struct topo_node *next; if ((next = TAILQ_FIRST(&node->children)) != NULL) return (next); if ((next = TAILQ_NEXT(node, siblings)) != NULL) return (next); while (node != top && (node = node->parent) != top) if ((next = TAILQ_NEXT(node, siblings)) != NULL) return (next); return (NULL); } /* * Iterate to the next node in the depth-first search of the topology tree, * but without descending below the current node. */ struct topo_node * topo_next_nonchild_node(struct topo_node *top, struct topo_node *node) { struct topo_node *next; if ((next = TAILQ_NEXT(node, siblings)) != NULL) return (next); while (node != top && (node = node->parent) != top) if ((next = TAILQ_NEXT(node, siblings)) != NULL) return (next); return (NULL); } /* * Assign the given ID to the given topology node that represents a logical * processor. */ void topo_set_pu_id(struct topo_node *node, cpuid_t id) { KASSERT(node->type == TOPO_TYPE_PU, ("topo_set_pu_id: wrong node type: %u", node->type)); KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0, ("topo_set_pu_id: cpuset already not empty")); node->id = id; CPU_SET(id, &node->cpuset); node->cpu_count = 1; node->subtype = 1; while ((node = node->parent) != NULL) { KASSERT(!CPU_ISSET(id, &node->cpuset), ("logical ID %u is already set in node %p", id, node)); CPU_SET(id, &node->cpuset); node->cpu_count++; } } static struct topology_spec { topo_node_type type; bool match_subtype; uintptr_t subtype; } topology_level_table[TOPO_LEVEL_COUNT] = { [TOPO_LEVEL_PKG] = { .type = TOPO_TYPE_PKG, }, [TOPO_LEVEL_GROUP] = { .type = TOPO_TYPE_GROUP, }, [TOPO_LEVEL_CACHEGROUP] = { .type = TOPO_TYPE_CACHE, .match_subtype = true, .subtype = CG_SHARE_L3, }, [TOPO_LEVEL_CORE] = { .type = TOPO_TYPE_CORE, }, [TOPO_LEVEL_THREAD] = { .type = TOPO_TYPE_PU, }, }; static bool topo_analyze_table(struct topo_node *root, int all, enum topo_level level, struct topo_analysis *results) { struct topology_spec *spec; struct topo_node *node; int count; if (level >= TOPO_LEVEL_COUNT) return (true); spec = &topology_level_table[level]; count = 0; node = topo_next_node(root, root); while (node != NULL) { if (node->type != spec->type || (spec->match_subtype && node->subtype != spec->subtype)) { node = topo_next_node(root, node); continue; } if (!all && CPU_EMPTY(&node->cpuset)) { node = topo_next_nonchild_node(root, node); continue; } count++; if (!topo_analyze_table(node, all, level + 1, results)) return (false); node = topo_next_nonchild_node(root, node); } /* No explicit subgroups is essentially one subgroup. */ if (count == 0) { count = 1; if (!topo_analyze_table(root, all, level + 1, results)) return (false); } if (results->entities[level] == -1) results->entities[level] = count; else if (results->entities[level] != count) return (false); return (true); } /* * Check if the topology is uniform, that is, each package has the same number * of cores in it and each core has the same number of threads (logical * processors) in it. If so, calculate the number of packages, the number of * groups per package, the number of cachegroups per group, and the number of * logical processors per cachegroup. 'all' parameter tells whether to include * administratively disabled logical processors into the analysis. */ int topo_analyze(struct topo_node *topo_root, int all, struct topo_analysis *results) { results->entities[TOPO_LEVEL_PKG] = -1; results->entities[TOPO_LEVEL_CORE] = -1; results->entities[TOPO_LEVEL_THREAD] = -1; results->entities[TOPO_LEVEL_GROUP] = -1; results->entities[TOPO_LEVEL_CACHEGROUP] = -1; if (!topo_analyze_table(topo_root, all, TOPO_LEVEL_PKG, results)) return (0); KASSERT(results->entities[TOPO_LEVEL_PKG] > 0, ("bug in topology or analysis")); return (1); } #endif /* SMP */ Index: head/sys/sys/_rwlock.h =================================================================== --- head/sys/sys/_rwlock.h (revision 344854) +++ head/sys/sys/_rwlock.h (revision 344855) @@ -1,64 +1,63 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS__RWLOCK_H_ #define _SYS__RWLOCK_H_ #include /* * Reader/writer lock. * * All reader/writer lock implementations must always have a member * called rw_lock. Other locking primitive structures are not allowed to * use this name for their members. * If this rule needs to change, the bits in the reader/writer lock * implementation must be modified appropriately. */ struct rwlock { struct lock_object lock_object; volatile uintptr_t rw_lock; }; /* * Members of struct rwlock_padalign must mirror members of struct rwlock. * rwlock_padalign rwlocks can use the rwlock(9) API transparently without * modification. * Pad-aligned rwlocks used within structures should generally be the * first member of the struct. Otherwise, the compiler can generate * additional padding for the struct to keep a correct alignment for * the rwlock. */ struct rwlock_padalign { struct lock_object lock_object; volatile uintptr_t rw_lock; } __aligned(CACHE_LINE_SIZE); #endif /* !_SYS__RWLOCK_H_ */ Index: head/sys/sys/refcount.h =================================================================== --- head/sys/sys/refcount.h (revision 344854) +++ head/sys/sys/refcount.h (revision 344855) @@ -1,113 +1,112 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SYS_REFCOUNT_H__ #define __SYS_REFCOUNT_H__ #include #include #ifdef _KERNEL #include #else #define KASSERT(exp, msg) /* */ #endif static __inline void refcount_init(volatile u_int *count, u_int value) { *count = value; } static __inline void refcount_acquire(volatile u_int *count) { KASSERT(*count < UINT_MAX, ("refcount %p overflowed", count)); atomic_add_int(count, 1); } static __inline int refcount_release(volatile u_int *count) { u_int old; atomic_thread_fence_rel(); old = atomic_fetchadd_int(count, -1); KASSERT(old > 0, ("refcount %p is zero", count)); if (old > 1) return (0); /* * Last reference. Signal the user to call the destructor. * * Ensure that the destructor sees all updates. The fence_rel * at the start of the function synchronized with this fence. */ atomic_thread_fence_acq(); return (1); } /* * This functions returns non-zero if the refcount was * incremented. Else zero is returned. */ static __inline __result_use_check int refcount_acquire_if_not_zero(volatile u_int *count) { u_int old; old = *count; for (;;) { KASSERT(old < UINT_MAX, ("refcount %p overflowed", count)); if (old == 0) return (0); if (atomic_fcmpset_int(count, &old, old + 1)) return (1); } } static __inline __result_use_check int refcount_release_if_not_last(volatile u_int *count) { u_int old; old = *count; for (;;) { KASSERT(old > 0, ("refcount %p is zero", count)); if (old == 1) return (0); if (atomic_fcmpset_int(count, &old, old - 1)) return (1); } } #endif /* ! __SYS_REFCOUNT_H__ */ Index: head/sys/sys/rwlock.h =================================================================== --- head/sys/sys/rwlock.h (revision 344854) +++ head/sys/sys/rwlock.h (revision 344855) @@ -1,300 +1,299 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_RWLOCK_H_ #define _SYS_RWLOCK_H_ #include #include #include #include #ifdef _KERNEL #include #include #endif /* * The rw_lock field consists of several fields. The low bit indicates * if the lock is locked with a read (shared) or write (exclusive) lock. * A value of 0 indicates a write lock, and a value of 1 indicates a read * lock. Bit 1 is a boolean indicating if there are any threads waiting * for a read lock. Bit 2 is a boolean indicating if there are any threads * waiting for a write lock. The rest of the variable's definition is * dependent on the value of the first bit. For a write lock, it is a * pointer to the thread holding the lock, similar to the mtx_lock field of * mutexes. For read locks, it is a count of read locks that are held. * * When the lock is not locked by any thread, it is encoded as a read lock * with zero waiters. */ #define RW_LOCK_READ 0x01 #define RW_LOCK_READ_WAITERS 0x02 #define RW_LOCK_WRITE_WAITERS 0x04 #define RW_LOCK_WRITE_SPINNER 0x08 #define RW_LOCK_WRITER_RECURSED 0x10 #define RW_LOCK_FLAGMASK \ (RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS | \ RW_LOCK_WRITE_SPINNER | RW_LOCK_WRITER_RECURSED) #define RW_LOCK_WAITERS (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS) #define RW_OWNER(x) ((x) & ~RW_LOCK_FLAGMASK) #define RW_READERS_SHIFT 5 #define RW_READERS(x) (RW_OWNER((x)) >> RW_READERS_SHIFT) #define RW_READERS_LOCK(x) ((x) << RW_READERS_SHIFT | RW_LOCK_READ) #define RW_ONE_READER (1 << RW_READERS_SHIFT) #define RW_UNLOCKED RW_READERS_LOCK(0) #define RW_DESTROYED (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS) #ifdef _KERNEL #define rw_recurse lock_object.lo_data #define RW_READ_VALUE(x) ((x)->rw_lock) /* Very simple operations on rw_lock. */ /* Try to obtain a write lock once. */ #define _rw_write_lock(rw, tid) \ atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid)) #define _rw_write_lock_fetch(rw, vp, tid) \ atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid)) /* Release a write lock quickly if there are no waiters. */ #define _rw_write_unlock(rw, tid) \ atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) #define _rw_write_unlock_fetch(rw, tid) \ atomic_fcmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) /* * Full lock operations that are suitable to be inlined in non-debug * kernels. If the lock cannot be acquired or released trivially then * the work is deferred to another function. */ /* Acquire a write lock. */ #define __rw_wlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ uintptr_t _v = RW_UNLOCKED; \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || \ !_rw_write_lock_fetch((rw), &_v, _tid))) \ _rw_wlock_hard((rw), _v, (file), (line)); \ } while (0) /* Release a write lock. */ #define __rw_wunlock(rw, tid, file, line) do { \ uintptr_t _v = (uintptr_t)(tid); \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || \ !_rw_write_unlock_fetch((rw), &_v))) \ _rw_wunlock_hard((rw), _v, (file), (line)); \ } while (0) /* * Function prototypes. Routines that start with _ are not part of the * external API and should not be called directly. Wrapper macros should * be used instead. */ void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts); void _rw_destroy(volatile uintptr_t *c); void rw_sysinit(void *arg); int _rw_wowned(const volatile uintptr_t *c); void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line); int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line); void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line); void __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); void __rw_rlock(volatile uintptr_t *c, const char *file, int line); int __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line); void _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line); void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF); void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF); int __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line); void __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); void __rw_downgrade(volatile uintptr_t *c, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line); #endif /* * Top-level macros to provide lock cookie once the actual rwlock is passed. * They will also prevent passing a malformed object to the rwlock KPI by * failing compilation as the rw_lock reserved member will not be found. */ #define rw_init(rw, n) \ _rw_init_flags(&(rw)->rw_lock, n, 0) #define rw_init_flags(rw, n, o) \ _rw_init_flags(&(rw)->rw_lock, n, o) #define rw_destroy(rw) \ _rw_destroy(&(rw)->rw_lock) #define rw_wowned(rw) \ _rw_wowned(&(rw)->rw_lock) #define _rw_wlock(rw, f, l) \ _rw_wlock_cookie(&(rw)->rw_lock, f, l) #define _rw_try_wlock(rw, f, l) \ __rw_try_wlock(&(rw)->rw_lock, f, l) #define _rw_wunlock(rw, f, l) \ _rw_wunlock_cookie(&(rw)->rw_lock, f, l) #define _rw_try_rlock(rw, f, l) \ __rw_try_rlock(&(rw)->rw_lock, f, l) #if LOCK_DEBUG > 0 #define _rw_rlock(rw, f, l) \ __rw_rlock(&(rw)->rw_lock, f, l) #define _rw_runlock(rw, f, l) \ _rw_runlock_cookie(&(rw)->rw_lock, f, l) #else #define _rw_rlock(rw, f, l) \ __rw_rlock_int((struct rwlock *)rw) #define _rw_runlock(rw, f, l) \ _rw_runlock_cookie_int((struct rwlock *)rw) #endif #if LOCK_DEBUG > 0 #define _rw_wlock_hard(rw, v, f, l) \ __rw_wlock_hard(&(rw)->rw_lock, v, f, l) #define _rw_wunlock_hard(rw, v, f, l) \ __rw_wunlock_hard(&(rw)->rw_lock, v, f, l) #define _rw_try_upgrade(rw, f, l) \ __rw_try_upgrade(&(rw)->rw_lock, f, l) #define _rw_downgrade(rw, f, l) \ __rw_downgrade(&(rw)->rw_lock, f, l) #else #define _rw_wlock_hard(rw, v, f, l) \ __rw_wlock_hard(&(rw)->rw_lock, v) #define _rw_wunlock_hard(rw, v, f, l) \ __rw_wunlock_hard(&(rw)->rw_lock, v) #define _rw_try_upgrade(rw, f, l) \ __rw_try_upgrade_int(rw) #define _rw_downgrade(rw, f, l) \ __rw_downgrade_int(rw) #endif #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define _rw_assert(rw, w, f, l) \ __rw_assert(&(rw)->rw_lock, w, f, l) #endif /* * Public interface for lock operations. */ #ifndef LOCK_DEBUG #error LOCK_DEBUG not defined, include before #endif #if LOCK_DEBUG > 0 || defined(RWLOCK_NOINLINE) #define rw_wlock(rw) _rw_wlock((rw), LOCK_FILE, LOCK_LINE) #define rw_wunlock(rw) _rw_wunlock((rw), LOCK_FILE, LOCK_LINE) #else #define rw_wlock(rw) \ __rw_wlock((rw), curthread, LOCK_FILE, LOCK_LINE) #define rw_wunlock(rw) \ __rw_wunlock((rw), curthread, LOCK_FILE, LOCK_LINE) #endif #define rw_rlock(rw) _rw_rlock((rw), LOCK_FILE, LOCK_LINE) #define rw_runlock(rw) _rw_runlock((rw), LOCK_FILE, LOCK_LINE) #define rw_try_rlock(rw) _rw_try_rlock((rw), LOCK_FILE, LOCK_LINE) #define rw_try_upgrade(rw) _rw_try_upgrade((rw), LOCK_FILE, LOCK_LINE) #define rw_try_wlock(rw) _rw_try_wlock((rw), LOCK_FILE, LOCK_LINE) #define rw_downgrade(rw) _rw_downgrade((rw), LOCK_FILE, LOCK_LINE) #define rw_unlock(rw) do { \ if (rw_wowned(rw)) \ rw_wunlock(rw); \ else \ rw_runlock(rw); \ } while (0) #define rw_sleep(chan, rw, pri, wmesg, timo) \ _sleep((chan), &(rw)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define rw_initialized(rw) lock_initialized(&(rw)->lock_object) struct rw_args { void *ra_rw; const char *ra_desc; int ra_flags; }; #define RW_SYSINIT_FLAGS(name, rw, desc, flags) \ static struct rw_args name##_args = { \ (rw), \ (desc), \ (flags), \ }; \ SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ rw_sysinit, &name##_args); \ SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock)) #define RW_SYSINIT(name, rw, desc) RW_SYSINIT_FLAGS(name, rw, desc, 0) /* * Options passed to rw_init_flags(). */ #define RW_DUPOK 0x01 #define RW_NOPROFILE 0x02 #define RW_NOWITNESS 0x04 #define RW_QUIET 0x08 #define RW_RECURSE 0x10 #define RW_NEW 0x20 /* * The INVARIANTS-enabled rw_assert() functionality. * * The constants need to be defined for INVARIANT_SUPPORT infrastructure * support as _rw_assert() itself uses them and the latter implies that * _rw_assert() must build. */ #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define RA_LOCKED LA_LOCKED #define RA_RLOCKED LA_SLOCKED #define RA_WLOCKED LA_XLOCKED #define RA_UNLOCKED LA_UNLOCKED #define RA_RECURSED LA_RECURSED #define RA_NOTRECURSED LA_NOTRECURSED #endif #ifdef INVARIANTS #define rw_assert(rw, what) _rw_assert((rw), (what), LOCK_FILE, LOCK_LINE) #else #define rw_assert(rw, what) #endif #endif /* _KERNEL */ #endif /* !_SYS_RWLOCK_H_ */ Index: head/sys/sys/sleepqueue.h =================================================================== --- head/sys/sys/sleepqueue.h (revision 344854) +++ head/sys/sys/sleepqueue.h (revision 344855) @@ -1,122 +1,121 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SLEEPQUEUE_H_ #define _SYS_SLEEPQUEUE_H_ /* * Sleep queue interface. Sleep/wakeup, condition variables, and sx * locks use a sleep queue for the queue of threads blocked on a sleep * channel. * * A thread calls sleepq_lock() to lock the sleep queue chain associated * with a given wait channel. A thread can then call call sleepq_add() to * add themself onto a sleep queue and call one of the sleepq_wait() * functions to actually go to sleep. If a thread needs to abort a sleep * operation it should call sleepq_release() to unlock the associated sleep * queue chain lock. If the thread also needs to remove itself from a queue * it just enqueued itself on, it can use sleepq_remove() instead. * * If the thread only wishes to sleep for a limited amount of time, it can * call sleepq_set_timeout() after sleepq_add() to setup a timeout. It * should then use one of the sleepq_timedwait() functions to block. * * A thread is normally resumed from a sleep queue by either the * sleepq_signal() or sleepq_broadcast() functions. Sleepq_signal() wakes * the thread with the highest priority that is sleeping on the specified * wait channel. Sleepq_broadcast() wakes all threads that are sleeping * on the specified wait channel. A thread sleeping in an interruptible * sleep can be interrupted by calling sleepq_abort(). A thread can also * be removed from a specified sleep queue using the sleepq_remove() * function. Note that the sleep queue chain must first be locked via * sleepq_lock() before calling sleepq_abort(), sleepq_broadcast(), or * sleepq_signal(). These routines each return a boolean that will be true * if at least one swapped-out thread was resumed. In that case, the caller * is responsible for waking up the swapper by calling kick_proc0() after * releasing the sleep queue chain lock. * * Each thread allocates a sleep queue at thread creation via sleepq_alloc() * and releases it at thread destruction via sleepq_free(). Note that * a sleep queue is not tied to a specific thread and that the sleep queue * released at thread destruction may not be the same sleep queue that the * thread allocated when it was created. * * XXX: Some other parts of the kernel such as ithread sleeping may end up * using this interface as well (death to TDI_IWAIT!) */ struct lock_object; struct sleepqueue; struct thread; #ifdef _KERNEL #define SLEEPQ_TYPE 0x0ff /* Mask of sleep queue types. */ #define SLEEPQ_SLEEP 0x00 /* Used by sleep/wakeup. */ #define SLEEPQ_CONDVAR 0x01 /* Used for a cv. */ #define SLEEPQ_PAUSE 0x02 /* Used by pause. */ #define SLEEPQ_SX 0x03 /* Used by an sx lock. */ #define SLEEPQ_LK 0x04 /* Used by a lockmgr. */ #define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */ void init_sleepqueues(void); int sleepq_abort(struct thread *td, int intrval); void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, int queue); struct sleepqueue *sleepq_alloc(void); int sleepq_broadcast(void *wchan, int flags, int pri, int queue); void sleepq_chains_remove_matching(bool (*matches)(struct thread *)); void sleepq_free(struct sleepqueue *sq); void sleepq_lock(void *wchan); struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); int sleepq_remove_matching(struct sleepqueue *sq, int queue, bool (*matches)(struct thread *), int pri); int sleepq_signal(void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags); #define sleepq_set_timeout(wchan, timo) \ sleepq_set_timeout_sbt((wchan), tick_sbt * (timo), 0, C_HARDCLOCK) u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri); int sleepq_type(void *wchan); void sleepq_wait(void *wchan, int pri); int sleepq_wait_sig(void *wchan, int pri); #ifdef STACK struct sbuf; int sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue, int *count_stacks_printed); #endif #endif /* _KERNEL */ #endif /* !_SYS_SLEEPQUEUE_H_ */ Index: head/sys/sys/turnstile.h =================================================================== --- head/sys/sys/turnstile.h (revision 344854) +++ head/sys/sys/turnstile.h (revision 344855) @@ -1,107 +1,106 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_TURNSTILE_H_ #define _SYS_TURNSTILE_H_ /* * Turnstile interface. Non-sleepable locks use a turnstile for the * queue of threads blocked on them when they are contested. Each * turnstile contains two sub-queues: one for threads waiting for a * shared, or read, lock, and one for threads waiting for an * exclusive, or write, lock. * * A thread calls turnstile_chain_lock() to lock the turnstile chain * associated with a given lock. A thread calls turnstile_wait() when * the lock is contested to be put on the queue and block. If a thread * calls turnstile_trywait() and decides to retry a lock operation instead * of blocking, it should call turnstile_cancel() to unlock the associated * turnstile chain lock. * * When a lock is released, the thread calls turnstile_lookup() to look * up the turnstile associated with the given lock in the hash table. Then * it calls either turnstile_signal() or turnstile_broadcast() to mark * blocked threads for a pending wakeup. turnstile_signal() marks the * highest priority blocked thread while turnstile_broadcast() marks all * blocked threads. The turnstile_signal() function returns true if the * turnstile became empty as a result. After the higher level code finishes * releasing the lock, turnstile_unpend() must be called to wake up the * pending thread(s) and give up ownership of the turnstile. * * Alternatively, if a thread wishes to relinquish ownership of a lock * without waking up any waiters, it may call turnstile_disown(). * * When a lock is acquired that already has at least one thread contested * on it, the new owner of the lock must claim ownership of the turnstile * via turnstile_claim(). * * Each thread allocates a turnstile at thread creation via turnstile_alloc() * and releases it at thread destruction via turnstile_free(). Note that * a turnstile is not tied to a specific thread and that the turnstile * released at thread destruction may not be the same turnstile that the * thread allocated when it was created. * * The highest priority thread blocked on a specified queue of a * turnstile can be obtained via turnstile_head(). A given queue can * also be queried to see if it is empty via turnstile_empty(). */ struct lock_object; struct thread; struct turnstile; #ifdef _KERNEL /* Which queue to block on or which queue to wakeup one or more threads from. */ #define TS_EXCLUSIVE_QUEUE 0 #define TS_SHARED_QUEUE 1 void init_turnstiles(void); void turnstile_adjust(struct thread *, u_char); struct turnstile *turnstile_alloc(void); void turnstile_broadcast(struct turnstile *, int); void turnstile_cancel(struct turnstile *); void turnstile_chain_lock(struct lock_object *); void turnstile_chain_unlock(struct lock_object *); void turnstile_claim(struct turnstile *); void turnstile_disown(struct turnstile *); int turnstile_empty(struct turnstile *ts, int queue); void turnstile_free(struct turnstile *); struct thread *turnstile_head(struct turnstile *, int); struct turnstile *turnstile_lookup(struct lock_object *); int turnstile_signal(struct turnstile *, int); struct turnstile *turnstile_trywait(struct lock_object *); void turnstile_unpend(struct turnstile *); void turnstile_wait(struct turnstile *, struct thread *, int); struct thread *turnstile_lock(struct turnstile *, struct lock_object **); void turnstile_unlock(struct turnstile *, struct lock_object *); void turnstile_assert(struct turnstile *); #endif /* _KERNEL */ #endif /* _SYS_TURNSTILE_H_ */ Index: head/sys/x86/acpica/madt.c =================================================================== --- head/sys/x86/acpica/madt.c (revision 344854) +++ head/sys/x86/acpica/madt.c (revision 344855) @@ -1,762 +1,761 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* These two arrays are indexed by APIC IDs. */ static struct { void *io_apic; UINT32 io_vector; } *ioapics; static struct lapic_info { u_int la_enabled; u_int la_acpi_id; } *lapics; int madt_found_sci_override; static ACPI_TABLE_MADT *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; static MALLOC_DEFINE(M_MADT, "madt_table", "ACPI MADT Table Items"); static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source); static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source); static int madt_find_cpu(u_int acpi_id, u_int *apic_id); static int madt_find_interrupt(int intr, void **apic, u_int *pin); static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg); static void madt_parse_interrupt_override( ACPI_MADT_INTERRUPT_OVERRIDE *intr); static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi); static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi); static int madt_probe(void); static int madt_probe_cpus(void); static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_register(void *dummy); static int madt_setup_local(void); static int madt_setup_io(void); static void madt_walk_table(acpi_subtable_handler *handler, void *arg); static struct apic_enumerator madt_enumerator = { .apic_name = "MADT", .apic_probe = madt_probe, .apic_probe_cpus = madt_probe_cpus, .apic_setup_local = madt_setup_local, .apic_setup_io = madt_setup_io }; /* * Look for an ACPI Multiple APIC Description Table ("APIC") */ static int madt_probe(void) { madt_physaddr = acpi_find_table(ACPI_SIG_MADT); if (madt_physaddr == 0) return (ENXIO); return (-50); } /* * Run through the MP table enumerating CPUs. */ static int madt_probe_cpus(void) { madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT); madt_length = madt->Header.Length; KASSERT(madt != NULL, ("Unable to re-map MADT")); madt_walk_table(madt_probe_cpus_handler, NULL); acpi_unmap_table(madt); madt = NULL; return (0); } /* * Initialize the local APIC on the BSP. */ static int madt_setup_local(void) { ACPI_TABLE_DMAR *dmartbl; vm_paddr_t dmartbl_physaddr; const char *reason; char *hw_vendor; u_int p[4]; int user_x2apic; bool bios_x2apic; if ((cpu_feature2 & CPUID2_X2APIC) != 0) { reason = NULL; /* * Automatically detect several configurations where * x2APIC mode is known to cause troubles. User can * override the setting with hw.x2apic_enable tunable. */ dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR); if (dmartbl_physaddr != 0) { dmartbl = acpi_map_table(dmartbl_physaddr, ACPI_SIG_DMAR); if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) reason = "by DMAR table"; acpi_unmap_table(dmartbl); } if (vm_guest == VM_GUEST_VMWARE) { vmware_hvcall(VMW_HVCMD_GETVCPU_INFO, p); if ((p[0] & VMW_VCPUINFO_VCPU_RESERVED) != 0 || (p[0] & VMW_VCPUINFO_LEGACY_X2APIC) == 0) reason = "inside VMWare without intr redirection"; } else if (vm_guest == VM_GUEST_XEN) { reason = "due to running under XEN"; } else if (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) == 0x2a) { hw_vendor = kern_getenv("smbios.planar.maker"); /* * It seems that some Lenovo and ASUS * SandyBridge-based notebook BIOSes have a * bug which prevents booting AP in x2APIC * mode. Since the only way to detect mobile * CPU is to check northbridge pci id, which * cannot be done that early, disable x2APIC * for all Lenovo and ASUS SandyBridge * machines. */ if (hw_vendor != NULL) { if (!strcmp(hw_vendor, "LENOVO") || !strcmp(hw_vendor, "ASUSTeK Computer Inc.")) { reason = "for a suspected SandyBridge BIOS bug"; } freeenv(hw_vendor); } } bios_x2apic = lapic_is_x2apic(); if (reason != NULL && bios_x2apic) { if (bootverbose) printf("x2APIC should be disabled %s but " "already enabled by BIOS; enabling.\n", reason); reason = NULL; } if (reason == NULL) x2apic_mode = 1; else if (bootverbose) printf("x2APIC available but disabled %s\n", reason); user_x2apic = x2apic_mode; TUNABLE_INT_FETCH("hw.x2apic_enable", &user_x2apic); if (user_x2apic != x2apic_mode) { if (bios_x2apic && !user_x2apic) printf("x2APIC disabled by tunable and " "enabled by BIOS; ignoring tunable."); else x2apic_mode = user_x2apic; } } /* * Truncate max_apic_id if not in x2APIC mode. Some structures * will already be allocated with the previous max_apic_id, but * at least we can prevent wasting more memory elsewhere. */ if (!x2apic_mode) max_apic_id = min(max_apic_id, xAPIC_MAX_APIC_ID); madt = pmap_mapbios(madt_physaddr, madt_length); lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_MADT, M_WAITOK | M_ZERO); madt_walk_table(madt_setup_cpus_handler, NULL); lapic_init(madt->Address); printf("ACPI APIC Table: <%.*s %.*s>\n", (int)sizeof(madt->Header.OemId), madt->Header.OemId, (int)sizeof(madt->Header.OemTableId), madt->Header.OemTableId); /* * We ignore 64-bit local APIC override entries. Should we * perhaps emit a warning here if we find one? */ return (0); } /* * Enumerate I/O APICs and setup interrupt sources. */ static int madt_setup_io(void) { void *ioapic; u_int pin; int i; KASSERT(lapics != NULL, ("local APICs not initialized")); /* Try to initialize ACPI so that we can access the FADT. */ i = acpi_Startup(); if (ACPI_FAILURE(i)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(i)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } ioapics = malloc(sizeof(*ioapics) * (IOAPIC_MAX_ID + 1), M_MADT, M_WAITOK | M_ZERO); /* First, we run through adding I/O APIC's. */ madt_walk_table(madt_parse_apics, NULL); /* Second, we run through the table tweaking interrupt sources. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { if (madt_find_interrupt(AcpiGbl_FADT.SciInterrupt, &ioapic, &pin) != 0) printf("MADT: Could not find APIC for SCI IRQ %u\n", AcpiGbl_FADT.SciInterrupt); else { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ioapic_set_polarity(ioapic, pin, INTR_POLARITY_LOW); ioapic_set_triggermode(ioapic, pin, INTR_TRIGGER_LEVEL); } } /* Third, we register all the I/O APIC's. */ for (i = 0; i <= IOAPIC_MAX_ID; i++) if (ioapics[i].io_apic != NULL) ioapic_register(ioapics[i].io_apic); /* Finally, we throw the switch to enable the I/O APIC's. */ acpi_SetDefaultIntrModel(ACPI_INTR_APIC); free(ioapics, M_MADT); ioapics = NULL; /* NB: this is the last use of the lapics array. */ free(lapics, M_MADT); lapics = NULL; return (0); } static void madt_register(void *dummy __unused) { apic_register_enumerator(&madt_enumerator); } SYSINIT(madt_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, madt_register, NULL); /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, handler, arg); } static void madt_parse_cpu(unsigned int apic_id, unsigned int flags) { if (!(flags & ACPI_MADT_ENABLED) || #ifdef SMP mp_ncpus == MAXCPU || #endif apic_id > MAX_APIC_ID) return; #ifdef SMP mp_ncpus++; mp_maxid = mp_ncpus - 1; #endif max_apic_id = max(apic_id, max_apic_id); } static void madt_add_cpu(u_int acpi_id, u_int apic_id, u_int flags) { struct lapic_info *la; /* * The MADT does not include a BSP flag, so we have to let the * MP code figure out which CPU is the BSP on its own. */ if (bootverbose) printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n", apic_id, acpi_id, flags & ACPI_MADT_ENABLED ? "enabled" : "disabled"); if (!(flags & ACPI_MADT_ENABLED)) return; if (apic_id > max_apic_id) { printf("MADT: Ignoring local APIC ID %u (too high)\n", apic_id); return; } la = &lapics[apic_id]; KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %u", apic_id)); la->la_enabled = 1; la->la_acpi_id = acpi_id; lapic_create(apic_id, 0); } static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_LOCAL_APIC *proc; ACPI_MADT_LOCAL_X2APIC *x2apic; switch (entry->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: proc = (ACPI_MADT_LOCAL_APIC *)entry; madt_parse_cpu(proc->Id, proc->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry; madt_parse_cpu(x2apic->LocalApicId, x2apic->LapicFlags); break; } } static void madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_LOCAL_APIC *proc; ACPI_MADT_LOCAL_X2APIC *x2apic; switch (entry->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: proc = (ACPI_MADT_LOCAL_APIC *)entry; madt_add_cpu(proc->ProcessorId, proc->Id, proc->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry; madt_add_cpu(x2apic->Uid, x2apic->LocalApicId, x2apic->LapicFlags); break; } } /* * Add an I/O APIC from an entry in the table. */ static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { ACPI_MADT_IO_APIC *apic; switch (entry->Type) { case ACPI_MADT_TYPE_IO_APIC: apic = (ACPI_MADT_IO_APIC *)entry; if (bootverbose) printf( "MADT: Found IO APIC ID %u, Interrupt %u at %p\n", apic->Id, apic->GlobalIrqBase, (void *)(uintptr_t)apic->Address); if (apic->Id > IOAPIC_MAX_ID) panic("%s: I/O APIC ID %u too high", __func__, apic->Id); if (ioapics[apic->Id].io_apic != NULL) panic("%s: Double APIC ID %u", __func__, apic->Id); ioapics[apic->Id].io_apic = ioapic_create(apic->Address, apic->Id, apic->GlobalIrqBase); ioapics[apic->Id].io_vector = apic->GlobalIrqBase; break; default: break; } } /* * Determine properties of an interrupt source. Note that for ACPI these * functions are only used for ISA interrupts, so we assume ISA bus values * (Active Hi, Edge Triggered) for conforming values except for the ACPI * SCI for which we use Active Lo, Level Triggered. */ static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_POLARITY_MASK) { default: printf("WARNING: Bogus Interrupt Polarity. Assume CONFORMS\n"); /* FALLTHROUGH*/ case ACPI_MADT_POLARITY_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_POLARITY_LOW); else return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_HIGH: return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_LOW: return (INTR_POLARITY_LOW); } } static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_TRIGGER_MASK) { default: printf("WARNING: Bogus Interrupt Trigger Mode. Assume CONFORMS.\n"); /*FALLTHROUGH*/ case ACPI_MADT_TRIGGER_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); } } /* * Find the local APIC ID associated with a given ACPI Processor ID. */ static int madt_find_cpu(u_int acpi_id, u_int *apic_id) { int i; for (i = 0; i <= max_apic_id; i++) { if (!lapics[i].la_enabled) continue; if (lapics[i].la_acpi_id != acpi_id) continue; *apic_id = i; return (0); } return (ENOENT); } /* * Find the IO APIC and pin on that APIC associated with a given global * interrupt. */ static int madt_find_interrupt(int intr, void **apic, u_int *pin) { int i, best; best = -1; for (i = 0; i <= IOAPIC_MAX_ID; i++) { if (ioapics[i].io_apic == NULL || ioapics[i].io_vector > intr) continue; if (best == -1 || ioapics[best].io_vector < ioapics[i].io_vector) best = i; } if (best == -1) return (ENOENT); *apic = ioapics[best].io_apic; *pin = intr - ioapics[best].io_vector; if (*pin > 32) printf("WARNING: Found intpin of %u for vector %d\n", *pin, intr); return (0); } void madt_parse_interrupt_values(void *entry, enum intr_trigger *trig, enum intr_polarity *pol) { ACPI_MADT_INTERRUPT_OVERRIDE *intr; char buf[64]; intr = entry; if (bootverbose) printf("MADT: Interrupt override: source %u, irq %u\n", intr->SourceIrq, intr->GlobalIrq); KASSERT(intr->Bus == 0, ("bus for interrupt overrides must be zero")); /* * Lookup the appropriate trigger and polarity modes for this * entry. */ *trig = interrupt_trigger(intr->IntiFlags, intr->SourceIrq); *pol = interrupt_polarity(intr->IntiFlags, intr->SourceIrq); /* * If the SCI is identity mapped but has edge trigger and * active-hi polarity or the force_sci_lo tunable is set, * force it to use level/lo. */ if (intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) { madt_found_sci_override = 1; if (getenv_string("hw.acpi.sci.trigger", buf, sizeof(buf))) { if (tolower(buf[0]) == 'e') *trig = INTR_TRIGGER_EDGE; else if (tolower(buf[0]) == 'l') *trig = INTR_TRIGGER_LEVEL; else panic( "Invalid trigger %s: must be 'edge' or 'level'", buf); printf("MADT: Forcing SCI to %s trigger\n", *trig == INTR_TRIGGER_EDGE ? "edge" : "level"); } if (getenv_string("hw.acpi.sci.polarity", buf, sizeof(buf))) { if (tolower(buf[0]) == 'h') *pol = INTR_POLARITY_HIGH; else if (tolower(buf[0]) == 'l') *pol = INTR_POLARITY_LOW; else panic( "Invalid polarity %s: must be 'high' or 'low'", buf); printf("MADT: Forcing SCI to active %s polarity\n", *pol == INTR_POLARITY_HIGH ? "high" : "low"); } } } /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { void *new_ioapic, *old_ioapic; u_int new_pin, old_pin; enum intr_trigger trig; enum intr_polarity pol; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } if (madt_find_interrupt(intr->GlobalIrq, &new_ioapic, &new_pin) != 0) { printf("MADT: Could not find APIC for vector %u (IRQ %u)\n", intr->GlobalIrq, intr->SourceIrq); return; } madt_parse_interrupt_values(intr, &trig, &pol); /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->SourceIrq != intr->GlobalIrq) { /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup and allocate * the interrupt. */ if (intr->GlobalIrq > 15 && intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) acpi_OverrideInterruptLevel(intr->GlobalIrq); else ioapic_remap_vector(new_ioapic, new_pin, intr->SourceIrq); if (madt_find_interrupt(intr->SourceIrq, &old_ioapic, &old_pin) != 0) printf("MADT: Could not find APIC for source IRQ %u\n", intr->SourceIrq); else if (ioapic_get_vector(old_ioapic, old_pin) == intr->SourceIrq) ioapic_disable_pin(old_ioapic, old_pin); } /* Program the polarity and trigger mode. */ ioapic_set_triggermode(new_ioapic, new_pin, trig); ioapic_set_polarity(new_ioapic, new_pin, pol); } /* * Parse an entry for an NMI routed to an IO APIC. */ static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi) { void *ioapic; u_int pin; if (madt_find_interrupt(nmi->GlobalIrq, &ioapic, &pin) != 0) { printf("MADT: Could not find APIC for vector %u\n", nmi->GlobalIrq); return; } ioapic_set_nmi(ioapic, pin); if (!(nmi->IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) ioapic_set_triggermode(ioapic, pin, interrupt_trigger(nmi->IntiFlags, 0)); if (!(nmi->IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) ioapic_set_polarity(ioapic, pin, interrupt_polarity(nmi->IntiFlags, 0)); } /* * Parse an entry for an NMI routed to a local APIC LVT pin. */ static void madt_handle_local_nmi(u_int acpi_id, UINT8 Lint, UINT16 IntiFlags) { u_int apic_id, pin; if (acpi_id == 0xffffffff) apic_id = APIC_ID_ALL; else if (madt_find_cpu(acpi_id, &apic_id) != 0) { if (bootverbose) printf("MADT: Ignoring local NMI routed to " "ACPI CPU %u\n", acpi_id); return; } if (Lint == 0) pin = APIC_LVT_LINT0; else pin = APIC_LVT_LINT1; lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); if (!(IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) lapic_set_lvt_triggermode(apic_id, pin, interrupt_trigger(IntiFlags, 0)); if (!(IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) lapic_set_lvt_polarity(apic_id, pin, interrupt_polarity(IntiFlags, 0)); } static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi) { madt_handle_local_nmi(nmi->ProcessorId == 0xff ? 0xffffffff : nmi->ProcessorId, nmi->Lint, nmi->IntiFlags); } static void madt_parse_local_x2apic_nmi(ACPI_MADT_LOCAL_X2APIC_NMI *nmi) { madt_handle_local_nmi(nmi->Uid, nmi->Lint, nmi->IntiFlags); } /* * Parse interrupt entries. */ static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { switch (entry->Type) { case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: madt_parse_interrupt_override( (ACPI_MADT_INTERRUPT_OVERRIDE *)entry); break; case ACPI_MADT_TYPE_NMI_SOURCE: madt_parse_nmi((ACPI_MADT_NMI_SOURCE *)entry); break; case ACPI_MADT_TYPE_LOCAL_APIC_NMI: madt_parse_local_nmi((ACPI_MADT_LOCAL_APIC_NMI *)entry); break; case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: madt_parse_local_x2apic_nmi( (ACPI_MADT_LOCAL_X2APIC_NMI *)entry); break; } } /* * Setup per-CPU ACPI IDs. */ static void madt_set_ids(void *dummy) { struct lapic_info *la; struct pcpu *pc; u_int i; if (madt == NULL) return; KASSERT(lapics != NULL, ("local APICs not initialized")); CPU_FOREACH(i) { pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); la = &lapics[pc->pc_apic_id]; if (!la->la_enabled) panic("APIC: CPU with APIC ID %u is not enabled", pc->pc_apic_id); pc->pc_acpi_id = la->la_acpi_id; if (bootverbose) printf("APIC: CPU %u has ACPI ID %u\n", i, la->la_acpi_id); } } SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, madt_set_ids, NULL); Index: head/sys/x86/include/apicvar.h =================================================================== --- head/sys/x86/include/apicvar.h (revision 344854) +++ head/sys/x86/include/apicvar.h (revision 344855) @@ -1,492 +1,491 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_APICVAR_H_ #define _X86_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define xAPIC_MAX_APIC_ID 0xfe #define xAPIC_ID_ALL 0xff #define MAX_APIC_ID 0x200 #define APIC_ID_ALL 0xffffffff #define IOAPIC_MAX_ID xAPIC_MAX_APIC_ID /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) /* IPIs handled by IPI_BITMAP_VECTOR */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */ #define IPI_DYN_FIRST (APIC_IPI_INTS + 8) #define IPI_DYN_LAST (253) /* IPIs allocated at runtime */ /* * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since * it is delivered using an NMI anyways. */ #define IPI_NMI_FIRST 254 #define IPI_TRACE 254 /* Interrupt for tracing. */ #define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI #define IRQ_EXTINT -1 #define IRQ_NMI -2 #define IRQ_SMI -3 #define IRQ_DISABLED -4 /* * An APIC enumerator is a pseudo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint), IDTVEC(apic_isr1_pti), IDTVEC(apic_isr2_pti), IDTVEC(apic_isr3_pti), IDTVEC(apic_isr4_pti), IDTVEC(apic_isr5_pti), IDTVEC(apic_isr6_pti), IDTVEC(apic_isr7_pti), IDTVEC(cmcint_pti), IDTVEC(errorint_pti), IDTVEC(spuriousint_pti), IDTVEC(timerint_pti); extern vm_paddr_t lapic_paddr; extern int *apic_cpuids; void apic_register_enumerator(struct apic_enumerator *enumerator); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); /* * Struct containing pointers to APIC functions whose * implementation is run time selectable. */ struct apic_ops { void (*create)(u_int, int); void (*init)(vm_paddr_t); void (*xapic_mode)(void); bool (*is_x2apic)(void); void (*setup)(int); void (*dump)(const char *); void (*disable)(void); void (*eoi)(void); int (*id)(void); int (*intr_pending)(u_int); void (*set_logical_id)(u_int, u_int, u_int); u_int (*cpuid)(u_int); /* Vectors */ u_int (*alloc_vector)(u_int, u_int); u_int (*alloc_vectors)(u_int, u_int *, u_int, u_int); void (*enable_vector)(u_int, u_int); void (*disable_vector)(u_int, u_int); void (*free_vector)(u_int, u_int, u_int); /* PMC */ int (*enable_pmc)(void); void (*disable_pmc)(void); void (*reenable_pmc)(void); /* CMC */ void (*enable_cmc)(void); /* AMD ELVT */ int (*enable_mca_elvt)(void); /* IPI */ void (*ipi_raw)(register_t, u_int); void (*ipi_vectored)(u_int, int); int (*ipi_wait)(int); int (*ipi_alloc)(inthand_t *ipifunc); void (*ipi_free)(int vector); /* LVT */ int (*set_lvt_mask)(u_int, u_int, u_char); int (*set_lvt_mode)(u_int, u_int, u_int32_t); int (*set_lvt_polarity)(u_int, u_int, enum intr_polarity); int (*set_lvt_triggermode)(u_int, u_int, enum intr_trigger); }; extern struct apic_ops apic_ops; static inline void lapic_create(u_int apic_id, int boot_cpu) { apic_ops.create(apic_id, boot_cpu); } static inline void lapic_init(vm_paddr_t addr) { apic_ops.init(addr); } static inline void lapic_xapic_mode(void) { apic_ops.xapic_mode(); } static inline bool lapic_is_x2apic(void) { return (apic_ops.is_x2apic()); } static inline void lapic_setup(int boot) { apic_ops.setup(boot); } static inline void lapic_dump(const char *str) { apic_ops.dump(str); } static inline void lapic_disable(void) { apic_ops.disable(); } static inline void lapic_eoi(void) { apic_ops.eoi(); } static inline int lapic_id(void) { return (apic_ops.id()); } static inline int lapic_intr_pending(u_int vector) { return (apic_ops.intr_pending(vector)); } /* XXX: UNUSED */ static inline void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { apic_ops.set_logical_id(apic_id, cluster, cluster_id); } static inline u_int apic_cpuid(u_int apic_id) { return (apic_ops.cpuid(apic_id)); } static inline u_int apic_alloc_vector(u_int apic_id, u_int irq) { return (apic_ops.alloc_vector(apic_id, irq)); } static inline u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { return (apic_ops.alloc_vectors(apic_id, irqs, count, align)); } static inline void apic_enable_vector(u_int apic_id, u_int vector) { apic_ops.enable_vector(apic_id, vector); } static inline void apic_disable_vector(u_int apic_id, u_int vector) { apic_ops.disable_vector(apic_id, vector); } static inline void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { apic_ops.free_vector(apic_id, vector, irq); } static inline int lapic_enable_pmc(void) { return (apic_ops.enable_pmc()); } static inline void lapic_disable_pmc(void) { apic_ops.disable_pmc(); } static inline void lapic_reenable_pmc(void) { apic_ops.reenable_pmc(); } static inline void lapic_enable_cmc(void) { apic_ops.enable_cmc(); } static inline int lapic_enable_mca_elvt(void) { return (apic_ops.enable_mca_elvt()); } static inline void lapic_ipi_raw(register_t icrlo, u_int dest) { apic_ops.ipi_raw(icrlo, dest); } static inline void lapic_ipi_vectored(u_int vector, int dest) { apic_ops.ipi_vectored(vector, dest); } static inline int lapic_ipi_wait(int delay) { return (apic_ops.ipi_wait(delay)); } static inline int lapic_ipi_alloc(inthand_t *ipifunc) { return (apic_ops.ipi_alloc(ipifunc)); } static inline void lapic_ipi_free(int vector) { return (apic_ops.ipi_free(vector)); } static inline int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked) { return (apic_ops.set_lvt_mask(apic_id, lvt, masked)); } static inline int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (apic_ops.set_lvt_mode(apic_id, lvt, mode)); } static inline int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (apic_ops.set_lvt_polarity(apic_id, lvt, pol)); } static inline int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger)); } void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); int ioapic_get_rid(u_int apic_id, uint16_t *ridp); extern int x2apic_mode; extern int lapic_eoi_suppression; #ifdef _SYS_SYSCTL_H_ SYSCTL_DECL(_hw_apic); #endif #endif /* !LOCORE */ #endif /* _X86_APICVAR_H_ */ Index: head/sys/x86/include/intr_machdep.h =================================================================== --- head/sys/x86/include/intr_machdep.h (revision 344854) +++ head/sys/x86/include/intr_machdep.h (revision 344855) @@ -1,176 +1,175 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_INTR_MACHDEP_H__ #define __X86_INTR_MACHDEP_H__ #ifdef _KERNEL /* * Values used in determining the allocation of IRQ values among * different types of I/O interrupts. These values are used as * indices into a interrupt source array to map I/O interrupts to a * device interrupt source whether it be a pin on an interrupt * controller or an MSI interrupt. The 16 ISA IRQs are assigned fixed * IDT vectors, but all other device interrupts allocate IDT vectors * on demand. Currently we have 191 IDT vectors available for device * interrupts on each CPU. On many systems with I/O APICs, a lot of * the IRQs are not used, so the total number of IRQ values reserved * can exceed the number of available IDT slots. * * The first 16 IRQs (0 - 15) are reserved for ISA IRQs. Interrupt * pins on I/O APICs for non-ISA interrupts use IRQ values starting at * IRQ 17. This layout matches the GSI numbering used by ACPI so that * IRQ values returned by ACPI methods such as _CRS can be used * directly by the ACPI bus driver. * * MSI interrupts allocate a block of interrupts starting at the end * of the I/O APIC range. When running under the Xen Hypervisor, an * additional range of IRQ values are available for binding to event * channel events. */ extern u_int first_msi_irq; extern u_int num_io_irqs; extern u_int num_msi_irqs; /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 #ifndef LOCORE typedef void inthand_t(void); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_register_sources)(struct pic *); void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); void (*pic_reprogram_pin)(struct intsrc *); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; u_int is_domain; u_int is_cpu; }; struct trapframe; #ifdef SMP extern cpuset_t intr_cpus; #endif extern struct mtx icu_lock; extern int elcr_found; #ifdef SMP extern int msix_disable_migration; #endif #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, int domain); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(int domain); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intr_reprogram(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #ifdef XENHVM void xen_intr_alloc_irqs(void); #endif #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__X86_INTR_MACHDEP_H__ */ Index: head/sys/x86/isa/atpic.c =================================================================== --- head/sys/x86/isa/atpic.c (revision 344854) +++ head/sys/x86/isa/atpic.c (revision 344855) @@ -1,633 +1,632 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * PIC driver for the 8259A Master and Slave PICs in PC/AT machines. */ #include __FBSDID("$FreeBSD$"); #include "opt_auto_eoi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #define SDT_ATPIC SDT_SYSIGT #define GSEL_ATPIC 0 #else #define SDT_ATPIC SDT_SYS386IGT #define GSEL_ATPIC GSEL(GCODE_SEL, SEL_KPL) #endif #define MASTER 0 #define SLAVE 1 #define IMEN_MASK(ai) (IRQ_MASK((ai)->at_irq)) #define NUM_ISA_IRQS 16 static void atpic_init(void *dummy); inthand_t IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2), IDTVEC(atpic_intr3), IDTVEC(atpic_intr4), IDTVEC(atpic_intr5), IDTVEC(atpic_intr6), IDTVEC(atpic_intr7), IDTVEC(atpic_intr8), IDTVEC(atpic_intr9), IDTVEC(atpic_intr10), IDTVEC(atpic_intr11), IDTVEC(atpic_intr12), IDTVEC(atpic_intr13), IDTVEC(atpic_intr14), IDTVEC(atpic_intr15); /* XXXKIB i386 uses stubs until pti comes */ inthand_t IDTVEC(atpic_intr0_pti), IDTVEC(atpic_intr1_pti), IDTVEC(atpic_intr2_pti), IDTVEC(atpic_intr3_pti), IDTVEC(atpic_intr4_pti), IDTVEC(atpic_intr5_pti), IDTVEC(atpic_intr6_pti), IDTVEC(atpic_intr7_pti), IDTVEC(atpic_intr8_pti), IDTVEC(atpic_intr9_pti), IDTVEC(atpic_intr10_pti), IDTVEC(atpic_intr11_pti), IDTVEC(atpic_intr12_pti), IDTVEC(atpic_intr13_pti), IDTVEC(atpic_intr14_pti), IDTVEC(atpic_intr15_pti); #define IRQ(ap, ai) ((ap)->at_irqbase + (ai)->at_irq) #define ATPIC(io, base, eoi) { \ .at_pic = { \ .pic_register_sources = atpic_register_sources, \ .pic_enable_source = atpic_enable_source, \ .pic_disable_source = atpic_disable_source, \ .pic_eoi_source = (eoi), \ .pic_enable_intr = atpic_enable_intr, \ .pic_disable_intr = atpic_disable_intr, \ .pic_vector = atpic_vector, \ .pic_source_pending = atpic_source_pending, \ .pic_resume = atpic_resume, \ .pic_config_intr = atpic_config_intr, \ .pic_assign_cpu = atpic_assign_cpu \ }, \ .at_ioaddr = (io), \ .at_irqbase = (base), \ .at_intbase = IDT_IO_INTS + (base), \ .at_imen = 0xff, \ } #define INTSRC(irq) \ { { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ), \ IDTVEC(atpic_intr ## irq ## _pti), (irq) % 8 } struct atpic { struct pic at_pic; int at_ioaddr; int at_irqbase; uint8_t at_intbase; uint8_t at_imen; }; struct atpic_intsrc { struct intsrc at_intsrc; inthand_t *at_intr, *at_intr_pti; int at_irq; /* Relative to PIC base. */ enum intr_trigger at_trigger; u_long at_count; u_long at_straycount; }; static void atpic_register_sources(struct pic *pic); static void atpic_enable_source(struct intsrc *isrc); static void atpic_disable_source(struct intsrc *isrc, int eoi); static void atpic_eoi_master(struct intsrc *isrc); static void atpic_eoi_slave(struct intsrc *isrc); static void atpic_enable_intr(struct intsrc *isrc); static void atpic_disable_intr(struct intsrc *isrc); static int atpic_vector(struct intsrc *isrc); static void atpic_resume(struct pic *pic, bool suspend_cancelled); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { ATPIC(IO_ICU1, 0, atpic_eoi_master), ATPIC(IO_ICU2, 8, atpic_eoi_slave) }; static struct atpic_intsrc atintrs[] = { INTSRC(0), INTSRC(1), INTSRC(2), INTSRC(3), INTSRC(4), INTSRC(5), INTSRC(6), INTSRC(7), INTSRC(8), INTSRC(9), INTSRC(10), INTSRC(11), INTSRC(12), INTSRC(13), INTSRC(14), INTSRC(15), }; CTASSERT(nitems(atintrs) == NUM_ISA_IRQS); static __inline void _atpic_eoi_master(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[MASTER].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif } /* * The data sheet says no auto-EOI on slave, but it sometimes works. * So, if AUTO_EOI_2 is enabled, we use it. */ static __inline void _atpic_eoi_slave(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[SLAVE].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_2 outb(atpics[SLAVE].at_ioaddr, OCW2_EOI); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif #endif } static void atpic_register_sources(struct pic *pic) { struct atpic *ap = (struct atpic *)pic; struct atpic_intsrc *ai; int i; /* * If any of the ISA IRQs have an interrupt source already, then * assume that the I/O APICs are being used and don't register any * of our interrupt sources. This makes sure we don't accidentally * use mixed mode. The "accidental" use could otherwise occur on * machines that route the ACPI SCI interrupt to a different ISA * IRQ (at least one machine routes it to IRQ 13) thus disabling * that APIC ISA routing and allowing the ATPIC source for that IRQ * to leak through. We used to depend on this feature for routing * IRQ0 via mixed mode, but now we don't use mixed mode at all. * * To avoid the slave not register sources after the master * registers its sources, register all IRQs when this function is * called on the master. */ if (ap != &atpics[MASTER]) return; for (i = 0; i < NUM_ISA_IRQS; i++) if (intr_lookup_source(i) != NULL) return; /* Loop through all interrupt sources and add them. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; intr_register_source(&ai->at_intsrc); } } static void atpic_enable_source(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (ap->at_imen & IMEN_MASK(ai)) { ap->at_imen &= ~IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen); } spinlock_exit(); } static void atpic_disable_source(struct intsrc *isrc, int eoi) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (ai->at_trigger != INTR_TRIGGER_EDGE) { ap->at_imen |= IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen); } /* * Take care to call these functions directly instead of through * a function pointer. All of the referenced variables should * still be hot in the cache. */ if (eoi == PIC_EOI) { if (isrc->is_pic == &atpics[MASTER].at_pic) _atpic_eoi_master(isrc); else _atpic_eoi_slave(isrc); } spinlock_exit(); } static void atpic_eoi_master(struct intsrc *isrc) { #ifndef AUTO_EOI_1 spinlock_enter(); _atpic_eoi_master(isrc); spinlock_exit(); #endif } static void atpic_eoi_slave(struct intsrc *isrc) { #ifndef AUTO_EOI_2 spinlock_enter(); _atpic_eoi_slave(isrc); spinlock_exit(); #endif } static void atpic_enable_intr(struct intsrc *isrc) { } static void atpic_disable_intr(struct intsrc *isrc) { } static int atpic_vector(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (IRQ(ap, ai)); } static int atpic_source_pending(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (inb(ap->at_ioaddr) & IMEN_MASK(ai)); } static void atpic_resume(struct pic *pic, bool suspend_cancelled) { struct atpic *ap = (struct atpic *)pic; i8259_init(ap, ap == &atpics[SLAVE]); if (ap == &atpics[SLAVE] && elcr_found) elcr_resume(); } static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; u_int vector; /* Map conforming values to edge/hi and sanity check the values. */ if (trig == INTR_TRIGGER_CONFORM) trig = INTR_TRIGGER_EDGE; if (pol == INTR_POLARITY_CONFORM) pol = INTR_POLARITY_HIGH; vector = atpic_vector(isrc); if ((trig == INTR_TRIGGER_EDGE && pol == INTR_POLARITY_LOW) || (trig == INTR_TRIGGER_LEVEL && pol == INTR_POLARITY_HIGH)) { printf( "atpic: Mismatched config for IRQ%u: trigger %s, polarity %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge" : "level", pol == INTR_POLARITY_HIGH ? "high" : "low"); return (EINVAL); } /* If there is no change, just return. */ if (ai->at_trigger == trig) return (0); /* * Certain IRQs can never be level/lo, so don't try to set them * that way if asked. At least some ELCR registers ignore setting * these bits as well. */ if ((vector == 0 || vector == 1 || vector == 2 || vector == 13) && trig == INTR_TRIGGER_LEVEL) { if (bootverbose) printf( "atpic: Ignoring invalid level/low configuration for IRQ%u\n", vector); return (EINVAL); } if (!elcr_found) { if (bootverbose) printf("atpic: No ELCR to configure IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); return (ENXIO); } if (bootverbose) printf("atpic: Programming IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); spinlock_enter(); elcr_write_trigger(atpic_vector(isrc), trig); ai->at_trigger = trig; spinlock_exit(); return (0); } static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { /* * 8259A's are only used in UP in which case all interrupts always * go to the sole CPU and this function shouldn't even be called. */ panic("%s: bad cookie", __func__); } static void i8259_init(struct atpic *pic, int slave) { int imr_addr; /* Reset the PIC and program with next four bytes. */ spinlock_enter(); outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET; /* Start vector. */ outb(imr_addr, pic->at_intbase); /* * Setup slave links. For the master pic, indicate what line * the slave is configured on. For the slave indicate * which line on the master we are connected to. */ if (slave) outb(imr_addr, ICU_SLAVEID); else outb(imr_addr, IRQ_MASK(ICU_SLAVEID)); /* Set mode. */ if (slave) outb(imr_addr, SLAVE_MODE); else outb(imr_addr, MASTER_MODE); /* Set interrupt enable mask. */ outb(imr_addr, pic->at_imen); /* Reset is finished, default to IRR on read. */ outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR); /* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */ if (!slave) outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1); spinlock_exit(); } void atpic_startup(void) { struct atpic_intsrc *ai; int i; /* Start off with all interrupts disabled. */ i8259_init(&atpics[MASTER], 0); i8259_init(&atpics[SLAVE], 1); atpic_enable_source((struct intsrc *)&atintrs[ICU_SLAVEID]); /* Install low-level interrupt handlers for all of our IRQs. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; ai->at_intsrc.is_count = &ai->at_count; ai->at_intsrc.is_straycount = &ai->at_straycount; setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase + ai->at_irq, pti ? ai->at_intr_pti : ai->at_intr, SDT_ATPIC, SEL_KPL, GSEL_ATPIC); } /* * Look for an ELCR. If we find one, update the trigger modes. * If we don't find one, assume that IRQs 0, 1, 2, and 13 are * edge triggered and that everything else is level triggered. * We only use the trigger information to reprogram the ELCR if * we have one and as an optimization to avoid masking edge * triggered interrupts. For the case that we don't have an ELCR, * it doesn't hurt to mask an edge triggered interrupt, so we * assume level trigger for any interrupt that we aren't sure is * edge triggered. */ if (elcr_found) { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) ai->at_trigger = elcr_read_trigger(i); } else { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) switch (i) { case 0: case 1: case 2: case 8: case 13: ai->at_trigger = INTR_TRIGGER_EDGE; break; default: ai->at_trigger = INTR_TRIGGER_LEVEL; break; } } } static void atpic_init(void *dummy __unused) { /* * Register our PICs, even if we aren't going to use any of their * pins so that they are suspended and resumed. */ if (intr_register_pic(&atpics[0].at_pic) != 0 || intr_register_pic(&atpics[1].at_pic) != 0) panic("Unable to register ATPICs"); if (num_io_irqs == 0) num_io_irqs = NUM_ISA_IRQS; } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL); void atpic_handle_intr(u_int vector, struct trapframe *frame) { struct intsrc *isrc; KASSERT(vector < NUM_ISA_IRQS, ("unknown int %u\n", vector)); isrc = &atintrs[vector].at_intsrc; /* * If we don't have an event, see if this is a spurious * interrupt. */ if (isrc->is_event == NULL && (vector == 7 || vector == 15)) { int port, isr; /* * Read the ISR register to see if IRQ 7/15 is really * pending. Reset read register back to IRR when done. */ port = ((struct atpic *)isrc->is_pic)->at_ioaddr; spinlock_enter(); outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS); isr = inb(port); outb(port, OCW3_SEL | OCW3_RR); spinlock_exit(); if ((isr & IRQ_MASK(7)) == 0) return; } intr_execute_handlers(isrc, frame); } #ifdef DEV_ISA /* * Bus attachment for the ISA PIC. */ static struct isa_pnp_id atpic_ids[] = { { 0x0000d041 /* PNP0000 */, "AT interrupt controller" }, { 0 } }; static int atpic_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids); if (result <= 0) device_quiet(dev); return (result); } /* * We might be granted IRQ 2, as this is typically consumed by chaining * between the two PIC components. If we're using the APIC, however, * this may not be the case, and as such we should free the resource. * (XXX untested) * * The generic ISA attachment code will handle allocating any other resources * that we don't explicitly claim here. */ static int atpic_attach(device_t dev) { struct resource *res; int rid; /* Try to allocate our IRQ and then free it. */ rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 0); if (res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, res); return (0); } /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } static device_method_t atpic_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atpic_probe), DEVMETHOD(device_attach, atpic_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t atpic_driver = { "atpic", atpic_methods, 1, /* no softc */ }; static devclass_t atpic_devclass; DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); ISA_PNP_INFO(atpic_ids); #endif /* DEV_ISA */ Index: head/sys/x86/isa/elcr.c =================================================================== --- head/sys/x86/isa/elcr.c (revision 344854) +++ head/sys/x86/isa/elcr.c (revision 344855) @@ -1,138 +1,137 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * The ELCR is a register that controls the trigger mode and polarity of * EISA and ISA interrupts. In FreeBSD 3.x and 4.x, the ELCR was only * consulted for determining the appropriate trigger mode of EISA * interrupts when using an APIC. However, it seems that almost all * systems that include PCI also include an ELCR that manages the ISA * IRQs 0 through 15. Thus, we check for the presence of an ELCR on * every machine by checking to see if the values found at bootup are * sane. Note that the polarity of ISA and EISA IRQs are linked to the * trigger mode. All edge triggered IRQs use active-hi polarity, and * all level triggered interrupts use active-lo polarity. * * The format of the ELCR is simple: it is a 16-bit bitmap where bit 0 * controls IRQ 0, bit 1 controls IRQ 1, etc. If the bit is zero, the * associated IRQ is edge triggered. If the bit is one, the IRQ is * level triggered. */ #include #include #include #include #define ELCR_PORT 0x4d0 #define ELCR_MASK(irq) (1 << (irq)) static int elcr_status; int elcr_found; /* * Check to see if we have what looks like a valid ELCR. We do this by * verifying that IRQs 0, 1, 2, and 13 are all edge triggered. */ int elcr_probe(void) { int i; elcr_status = inb(ELCR_PORT) | inb(ELCR_PORT + 1) << 8; if ((elcr_status & (ELCR_MASK(0) | ELCR_MASK(1) | ELCR_MASK(2) | ELCR_MASK(8) | ELCR_MASK(13))) != 0) return (ENXIO); if (bootverbose) { printf("ELCR Found. ISA IRQs programmed as:\n"); for (i = 0; i < 16; i++) printf(" %2d", i); printf("\n"); for (i = 0; i < 16; i++) if (elcr_status & ELCR_MASK(i)) printf(" L"); else printf(" E"); printf("\n"); } if (resource_disabled("elcr", 0)) return (ENXIO); elcr_found = 1; return (0); } /* * Returns 1 for level trigger, 0 for edge. */ enum intr_trigger elcr_read_trigger(u_int irq) { KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq)); if (elcr_status & ELCR_MASK(irq)) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); } /* * Set the trigger mode for a specified IRQ. Mode of 0 means edge triggered, * and a mode of 1 means level triggered. */ void elcr_write_trigger(u_int irq, enum intr_trigger trigger) { int new_status; KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq)); if (trigger == INTR_TRIGGER_LEVEL) new_status = elcr_status | ELCR_MASK(irq); else new_status = elcr_status & ~ELCR_MASK(irq); if (new_status == elcr_status) return; elcr_status = new_status; if (irq >= 8) outb(ELCR_PORT + 1, elcr_status >> 8); else outb(ELCR_PORT, elcr_status & 0xff); } void elcr_resume(void) { KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); outb(ELCR_PORT, elcr_status & 0xff); outb(ELCR_PORT + 1, elcr_status >> 8); } Index: head/sys/x86/x86/intr_machdep.c =================================================================== --- head/sys/x86/x86/intr_machdep.c (revision 344854) +++ head/sys/x86/x86/intr_machdep.c (revision 344855) @@ -1,848 +1,847 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Machine dependent interrupt code for x86. For x86, we have to * deal with different PICs. Thus, we use the passed in vector to lookup * an interrupt source associated with that vector. The interrupt source * describes which PIC the source belongs to and includes methods to handle * that source. */ #include "opt_atpic.h" #include "opt_ddb.h" #include "opt_smp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #ifndef DEV_ATPIC #include #include #include #include #include #endif #include #define MAX_STRAY_LOG 5 typedef void (*mask_fn)(void *); static int intrcnt_index; static struct intsrc **interrupt_sources; #ifdef SMP static struct intsrc **interrupt_sorted; static int intrbalance; SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, "Interrupt auto-balance interval (seconds). Zero disables."); static struct timeout_task intrbalance_task; #endif static struct sx intrsrc_lock; static struct mtx intrpic_lock; static struct mtx intrcnt_lock; static TAILQ_HEAD(pics_head, pic) pics; u_int num_io_irqs; #if defined(SMP) && !defined(EARLY_AP_STARTUP) static int assign_cpu; #endif u_long *intrcnt; char *intrnames; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); int nintrcnt; static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); static int intr_assign_cpu(void *arg, int cpu); static void intr_disable_src(void *arg); static void intr_init(void *__dummy); static int intr_pic_registered(struct pic *pic); static void intrcnt_setname(const char *name, int index); static void intrcnt_updatename(struct intsrc *is); static void intrcnt_register(struct intsrc *is); /* * SYSINIT levels for SI_SUB_INTR: * * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init * SI_ORDER_SECOND: Xen PICs * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges * SI_ORDER_FOURTH: Add 8259A PICs * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources * SI_ORDER_MIDDLE: SMP interrupt counters * SI_ORDER_ANY: Enable interrupts on BSP */ static int intr_pic_registered(struct pic *pic) { struct pic *p; TAILQ_FOREACH(p, &pics, pics) { if (p == pic) return (1); } return (0); } /* * Register a new interrupt controller (PIC). This is to support suspend * and resume where we suspend/resume controllers rather than individual * sources. This also allows controllers with no active sources (such as * 8259As in a system using the APICs) to participate in suspend and resume. */ int intr_register_pic(struct pic *pic) { int error; mtx_lock(&intrpic_lock); if (intr_pic_registered(pic)) error = EBUSY; else { TAILQ_INSERT_TAIL(&pics, pic, pics); error = 0; } mtx_unlock(&intrpic_lock); return (error); } /* * Allocate interrupt source arrays and register interrupt sources * once the number of interrupts is known. */ static void intr_init_sources(void *arg) { struct pic *pic; MPASS(num_io_irqs > 0); interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), M_INTR, M_WAITOK | M_ZERO); #ifdef SMP interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), M_INTR, M_WAITOK | M_ZERO); #endif /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 1 counter for each CPU for the Hyper-V vmbus driver. * - 8 counters for each CPU for IPI counters for SMP. */ nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; #ifdef COUNT_IPIS if (mp_ncpus > 1) nintrcnt += 8 * mp_ncpus; #endif intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | M_ZERO); intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK | M_ZERO); sintrcnt = nintrcnt * sizeof(u_long); sintrnames = nintrcnt * (MAXCOMLEN + 1); intrcnt_setname("???", 0); intrcnt_index = 1; /* * NB: intrpic_lock is not held here to avoid LORs due to * malloc() in intr_register_source(). However, we are still * single-threaded at this point in startup so the list of * PICs shouldn't change. */ TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_register_sources != NULL) pic->pic_register_sources(pic); } } SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, NULL); /* * Register a new interrupt source with the global interrupt system. * The global interrupts need to be disabled when this function is * called. */ int intr_register_source(struct intsrc *isrc) { int error, vector; KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); vector = isrc->is_pic->pic_vector(isrc); KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, num_io_irqs)); if (interrupt_sources[vector] != NULL) return (EEXIST); error = intr_event_create(&isrc->is_event, isrc, 0, vector, intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", vector); if (error) return (error); sx_xlock(&intrsrc_lock); if (interrupt_sources[vector] != NULL) { sx_xunlock(&intrsrc_lock); intr_event_destroy(isrc->is_event); return (EEXIST); } intrcnt_register(isrc); interrupt_sources[vector] = isrc; isrc->is_handlers = 0; sx_xunlock(&intrsrc_lock); return (0); } struct intsrc * intr_lookup_source(int vector) { if (vector < 0 || vector >= num_io_irqs) return (NULL); return (interrupt_sources[vector]); } int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, int domain) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_add_handler(isrc->is_event, name, filter, handler, arg, intr_priority(flags), flags, cookiep); if (error == 0) { sx_xlock(&intrsrc_lock); intrcnt_updatename(isrc); isrc->is_handlers++; if (isrc->is_handlers == 1) { isrc->is_domain = domain; isrc->is_pic->pic_enable_intr(isrc); isrc->is_pic->pic_enable_source(isrc); } sx_xunlock(&intrsrc_lock); } return (error); } int intr_remove_handler(void *cookie) { struct intsrc *isrc; int error; isrc = intr_handler_source(cookie); error = intr_event_remove_handler(cookie); if (error == 0) { sx_xlock(&intrsrc_lock); isrc->is_handlers--; if (isrc->is_handlers == 0) { isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); isrc->is_pic->pic_disable_intr(isrc); } intrcnt_updatename(isrc); sx_xunlock(&intrsrc_lock); } return (error); } int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); } static void intr_disable_src(void *arg) { struct intsrc *isrc; isrc = arg; isrc->is_pic->pic_disable_source(isrc, PIC_EOI); } void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) { struct intr_event *ie; int vector; /* * We count software interrupts when we process them. The * code here follows previous practice, but there's an * argument for counting hardware interrupts when they're * processed too. */ (*isrc->is_count)++; VM_CNT_INC(v_intr); ie = isrc->is_event; /* * XXX: We assume that IRQ 0 is only used for the ISA timer * device (clk). */ vector = isrc->is_pic->pic_vector(isrc); if (vector == 0) clkintr_pending = 1; /* * For stray interrupts, mask and EOI the source, bump the * stray count, and log the condition. */ if (intr_event_handle(ie, frame) != 0) { isrc->is_pic->pic_disable_source(isrc, PIC_EOI); (*isrc->is_straycount)++; if (*isrc->is_straycount < MAX_STRAY_LOG) log(LOG_ERR, "stray irq%d\n", vector); else if (*isrc->is_straycount == MAX_STRAY_LOG) log(LOG_CRIT, "too many stray irq %d's: not logging anymore\n", vector); } } void intr_resume(bool suspend_cancelled) { struct pic *pic; #ifndef DEV_ATPIC atpic_reset(); #endif mtx_lock(&intrpic_lock); TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) pic->pic_resume(pic, suspend_cancelled); } mtx_unlock(&intrpic_lock); } void intr_suspend(void) { struct pic *pic; mtx_lock(&intrpic_lock); TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { if (pic->pic_suspend != NULL) pic->pic_suspend(pic); } mtx_unlock(&intrpic_lock); } static int intr_assign_cpu(void *arg, int cpu) { #ifdef SMP struct intsrc *isrc; int error; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); /* Nothing to do if there is only a single CPU. */ if (mp_ncpus > 1 && cpu != NOCPU) { #else /* * Don't do anything during early boot. We will pick up the * assignment once the APs are started. */ if (assign_cpu && cpu != NOCPU) { #endif isrc = arg; sx_xlock(&intrsrc_lock); error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); if (error == 0) isrc->is_cpu = cpu; sx_xunlock(&intrsrc_lock); } else error = 0; return (error); #else return (EOPNOTSUPP); #endif } static void intrcnt_setname(const char *name, int index) { snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } static void intrcnt_updatename(struct intsrc *is) { intrcnt_setname(is->is_event->ie_fullname, is->is_index); } static void intrcnt_register(struct intsrc *is) { char straystr[MAXCOMLEN + 1]; KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); mtx_lock_spin(&intrcnt_lock); MPASS(intrcnt_index + 2 <= nintrcnt); is->is_index = intrcnt_index; intrcnt_index += 2; snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", is->is_pic->pic_vector(is)); intrcnt_updatename(is); is->is_count = &intrcnt[is->is_index]; intrcnt_setname(straystr, is->is_index + 1); is->is_straycount = &intrcnt[is->is_index + 1]; mtx_unlock_spin(&intrcnt_lock); } void intrcnt_add(const char *name, u_long **countp) { mtx_lock_spin(&intrcnt_lock); MPASS(intrcnt_index < nintrcnt); *countp = &intrcnt[intrcnt_index]; intrcnt_setname(name, intrcnt_index); intrcnt_index++; mtx_unlock_spin(&intrcnt_lock); } static void intr_init(void *dummy __unused) { TAILQ_INIT(&pics); mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); sx_init(&intrsrc_lock, "intrsrc"); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); static void intr_init_final(void *dummy __unused) { /* * Enable interrupts on the BSP after all of the interrupt * controllers are initialized. Device interrupts are still * disabled in the interrupt controllers until interrupt * handlers are registered. Interrupts are enabled on each AP * after their first context switch. */ enable_intr(); } SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); #ifndef DEV_ATPIC /* Initialize the two 8259A's to a known-good shutdown state. */ void atpic_reset(void) { outb(IO_ICU1, ICW1_RESET | ICW1_IC4); outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU1, OCW3_SEL | OCW3_RR); outb(IO_ICU2, ICW1_RESET | ICW1_IC4); outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU2, OCW3_SEL | OCW3_RR); } #endif /* Add a description to an active interrupt handler. */ int intr_describe(u_int vector, void *ih, const char *descr) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_describe_handler(isrc->is_event, ih, descr); if (error) return (error); intrcnt_updatename(isrc); return (0); } void intr_reprogram(void) { struct intsrc *is; u_int v; sx_xlock(&intrsrc_lock); for (v = 0; v < num_io_irqs; v++) { is = interrupt_sources[v]; if (is == NULL) continue; if (is->is_pic->pic_reprogram_pin != NULL) is->is_pic->pic_reprogram_pin(is); } sx_xunlock(&intrsrc_lock); } #ifdef DDB /* * Dump data about interrupt handlers */ DB_SHOW_COMMAND(irqs, db_show_irqs) { struct intsrc **isrc; u_int i; int verbose; if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; isrc = interrupt_sources; for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) if (*isrc != NULL) db_dump_intr_event((*isrc)->is_event, verbose); } #endif #ifdef SMP /* * Support for balancing interrupt sources across CPUs. For now we just * allocate CPUs round-robin. */ cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); static int current_cpu[MAXMEMDOM]; static void intr_init_cpus(void) { int i; for (i = 0; i < vm_ndomains; i++) { current_cpu[i] = 0; if (!CPU_ISSET(current_cpu[i], &intr_cpus) || !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) intr_next_cpu(i); } } /* * Return the CPU that the next interrupt source should use. For now * this just returns the next local APIC according to round-robin. */ u_int intr_next_cpu(int domain) { u_int apic_id; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); if (mp_ncpus == 1) return (PCPU_GET(apic_id)); #else /* Leave all interrupts on the BSP during boot. */ if (!assign_cpu) return (PCPU_GET(apic_id)); #endif mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu[domain]]; do { current_cpu[domain]++; if (current_cpu[domain] > mp_maxid) current_cpu[domain] = 0; } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain])); mtx_unlock_spin(&icu_lock); return (apic_id); } /* Attempt to bind the specified IRQ to the specified CPU. */ int intr_bind(u_int vector, u_char cpu) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (intr_event_bind(isrc->is_event, cpu)); } /* * Add a CPU to our mask of valid CPUs that can be destinations of * interrupts. */ void intr_add_cpu(u_int cpu) { if (cpu >= MAXCPU) panic("%s: Invalid CPU ID", __func__); if (bootverbose) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); CPU_SET(cpu, &intr_cpus); } #ifdef EARLY_AP_STARTUP static void intr_smp_startup(void *arg __unused) { intr_init_cpus(); return; } SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, NULL); #else /* * Distribute all the interrupt sources among the available CPUs once the * AP's have been launched. */ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; u_int cpu, i; intr_init_cpus(); /* Don't bother on UP. */ if (mp_ncpus == 1) return; /* Round-robin assign a CPU to each enabled source. */ sx_xlock(&intrsrc_lock); assign_cpu = 1; for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc != NULL && isrc->is_handlers > 0) { /* * If this event is already bound to a CPU, * then assign the source to that CPU instead * of picking one via round-robin. Note that * this is careful to only advance the * round-robin if the CPU assignment succeeds. */ cpu = isrc->is_event->ie_cpu; if (cpu == NOCPU) cpu = current_cpu[isrc->is_domain]; if (isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) { isrc->is_cpu = cpu; if (isrc->is_event->ie_cpu == NOCPU) intr_next_cpu(isrc->is_domain); } } } sx_xunlock(&intrsrc_lock); } SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL); #endif /* * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. */ static int sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct intsrc *isrc; u_int i; int error; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sx_slock(&intrsrc_lock); for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc == NULL) continue; sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", isrc->is_event->ie_fullname, isrc->is_index, isrc->is_cpu, isrc->is_domain, *isrc->is_count); } sx_sunlock(&intrsrc_lock); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count"); /* * Compare two, possibly NULL, entries in the interrupt source array * by load. */ static int intrcmp(const void *one, const void *two) { const struct intsrc *i1, *i2; i1 = *(const struct intsrc * const *)one; i2 = *(const struct intsrc * const *)two; if (i1 != NULL && i2 != NULL) return (*i1->is_count - *i2->is_count); if (i1 != NULL) return (1); if (i2 != NULL) return (-1); return (0); } /* * Balance IRQs across available CPUs according to load. */ static void intr_balance(void *dummy __unused, int pending __unused) { struct intsrc *isrc; int interval; u_int cpu; int i; interval = intrbalance; if (interval == 0) goto out; /* * Sort interrupts according to count. */ sx_xlock(&intrsrc_lock); memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * sizeof(interrupt_sorted[0])); qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), intrcmp); /* * Restart the scan from the same location to avoid moving in the * common case. */ intr_init_cpus(); /* * Assign round-robin from most loaded to least. */ for (i = num_io_irqs - 1; i >= 0; i--) { isrc = interrupt_sorted[i]; if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) continue; cpu = current_cpu[isrc->is_domain]; intr_next_cpu(isrc->is_domain); if (isrc->is_cpu != cpu && isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) isrc->is_cpu = cpu; } sx_xunlock(&intrsrc_lock); out: taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, interval ? hz * interval : hz * 60); } static void intr_balance_init(void *dummy __unused) { TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, NULL); taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); } SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); #else /* * Always route interrupts to the current processor in the UP case. */ u_int intr_next_cpu(int domain) { return (PCPU_GET(apic_id)); } #endif Index: head/sys/x86/x86/io_apic.c =================================================================== --- head/sys/x86/x86/io_apic.c (revision 344854) +++ head/sys/x86/x86/io_apic.c (revision 344855) @@ -1,1252 +1,1251 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IOAPIC_ISA_INTS 16 #define IOAPIC_MEM_REGION 32 #define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2) #define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1) static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures"); /* * I/O APIC interrupt source driver. Each pin is assigned an IRQ cookie * as laid out in the ACPI System Interrupt number model where each I/O * APIC has a contiguous chunk of the System Interrupt address space. * We assume that IRQs 1 - 15 behave like ISA IRQs and that all other * IRQs behave as PCI IRQs by default. We also assume that the pin for * IRQ 0 is actually an ExtINT pin. The apic enumerators override the * configuration of individual pins as indicated by their tables. * * Documentation for the I/O APIC: "82093AA I/O Advanced Programmable * Interrupt Controller (IOAPIC)", May 1996, Intel Corp. * ftp://download.intel.com/design/chipsets/datashts/29056601.pdf */ struct ioapic_intsrc { struct intsrc io_intsrc; int io_irq; u_int io_intpin:8; u_int io_vector:8; u_int io_cpu; u_int io_activehi:1; u_int io_edgetrigger:1; u_int io_masked:1; int io_bus:4; uint32_t io_lowreg; u_int io_remap_cookie; }; struct ioapic { struct pic io_pic; u_int io_id:8; /* logical ID */ u_int io_apic_id:4; u_int io_intbase:8; /* System Interrupt base */ u_int io_numintr:8; u_int io_haseoi:1; volatile ioapic_t *io_addr; /* XXX: should use bus_space */ vm_paddr_t io_paddr; STAILQ_ENTRY(ioapic) io_next; device_t pci_dev; /* matched pci device, if found */ struct resource *pci_wnd; /* BAR 0, should be same or alias to io_paddr */ struct ioapic_intsrc io_pins[0]; }; static u_int ioapic_read(volatile ioapic_t *apic, int reg); static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); static const char *ioapic_bus_string(int bus_type); static void ioapic_print_irq(struct ioapic_intsrc *intpin); static void ioapic_register_sources(struct pic *pic); static void ioapic_enable_source(struct intsrc *isrc); static void ioapic_disable_source(struct intsrc *isrc, int eoi); static void ioapic_eoi_source(struct intsrc *isrc); static void ioapic_enable_intr(struct intsrc *isrc); static void ioapic_disable_intr(struct intsrc *isrc); static int ioapic_vector(struct intsrc *isrc); static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_resume(struct pic *pic, bool suspend_cancelled); static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); static void ioapic_reprogram_intpin(struct intsrc *isrc); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); struct pic ioapic_template = { .pic_register_sources = ioapic_register_sources, .pic_enable_source = ioapic_enable_source, .pic_disable_source = ioapic_disable_source, .pic_eoi_source = ioapic_eoi_source, .pic_enable_intr = ioapic_enable_intr, .pic_disable_intr = ioapic_disable_intr, .pic_vector = ioapic_vector, .pic_source_pending = ioapic_source_pending, .pic_suspend = NULL, .pic_resume = ioapic_resume, .pic_config_intr = ioapic_config_intr, .pic_assign_cpu = ioapic_assign_cpu, .pic_reprogram_pin = ioapic_reprogram_intpin, }; static u_int next_ioapic_base; static u_int next_id; static int enable_extint; SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0, "Enable the ExtINT pin in the first I/O APIC"); static void _ioapic_eoi_source(struct intsrc *isrc, int locked) { struct ioapic_intsrc *src; struct ioapic *io; volatile uint32_t *apic_eoi; uint32_t low1; lapic_eoi(); if (!lapic_eoi_suppression) return; src = (struct ioapic_intsrc *)isrc; if (src->io_edgetrigger) return; io = (struct ioapic *)isrc->is_pic; /* * Handle targeted EOI for level-triggered pins, if broadcast * EOI suppression is supported by LAPICs. */ if (io->io_haseoi) { /* * If IOAPIC has EOI Register, simply write vector * number into the reg. */ apic_eoi = (volatile uint32_t *)((volatile char *) io->io_addr + IOAPIC_EOIR); *apic_eoi = src->io_vector; } else { /* * Otherwise, if IO-APIC is too old to provide EOIR, * do what Intel did for the Linux kernel. Temporary * switch the pin to edge-trigger and back, masking * the pin during the trick. */ if (!locked) mtx_lock_spin(&icu_lock); low1 = src->io_lowreg; low1 &= ~IOART_TRGRLVL; low1 |= IOART_TRGREDG | IOART_INTMSET; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin), low1); ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin), src->io_lowreg); if (!locked) mtx_unlock_spin(&icu_lock); } } static u_int ioapic_read(volatile ioapic_t *apic, int reg) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; return (apic->iowin); } static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; apic->iowin = val; } static const char * ioapic_bus_string(int bus_type) { switch (bus_type) { case APIC_BUS_ISA: return ("ISA"); case APIC_BUS_EISA: return ("EISA"); case APIC_BUS_PCI: return ("PCI"); default: return ("unknown"); } } static void ioapic_print_irq(struct ioapic_intsrc *intpin) { switch (intpin->io_irq) { case IRQ_DISABLED: printf("disabled"); break; case IRQ_EXTINT: printf("ExtINT"); break; case IRQ_NMI: printf("NMI"); break; case IRQ_SMI: printf("SMI"); break; default: printf("%s IRQ %d", ioapic_bus_string(intpin->io_bus), intpin->io_irq); } } static void ioapic_enable_source(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (intpin->io_masked) { flags = intpin->io_lowreg & ~IOART_INTMASK; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 0; } mtx_unlock_spin(&icu_lock); } static void ioapic_disable_source(struct intsrc *isrc, int eoi) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (!intpin->io_masked && !intpin->io_edgetrigger) { flags = intpin->io_lowreg | IOART_INTMSET; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 1; } if (eoi == PIC_EOI) _ioapic_eoi_source(isrc, 1); mtx_unlock_spin(&icu_lock); } static void ioapic_eoi_source(struct intsrc *isrc) { _ioapic_eoi_source(isrc, 0); } /* * Completely program an intpin based on the data in its interrupt source * structure. */ static void ioapic_program_intpin(struct ioapic_intsrc *intpin) { struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; uint32_t low, high; #ifdef ACPI_DMAR int error; #endif /* * If a pin is completely invalid or if it is valid but hasn't * been enabled yet, just ensure that the pin is masked. */ mtx_assert(&icu_lock, MA_OWNED); if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq >= 0 && intpin->io_vector == 0)) { low = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); if ((low & IOART_INTMASK) == IOART_INTMCLR) ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low | IOART_INTMSET); #ifdef ACPI_DMAR mtx_unlock_spin(&icu_lock); iommu_unmap_ioapic_intr(io->io_apic_id, &intpin->io_remap_cookie); mtx_lock_spin(&icu_lock); #endif return; } #ifdef ACPI_DMAR mtx_unlock_spin(&icu_lock); error = iommu_map_ioapic_intr(io->io_apic_id, intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger, intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie, &high, &low); mtx_lock_spin(&icu_lock); if (error == 0) { ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), high); intpin->io_lowreg = low; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); return; } else if (error != EOPNOTSUPP) { return; } #endif /* * Set the destination. Note that with Intel interrupt remapping, * the previously reserved bits 55:48 now have a purpose so ensure * these are zero. */ low = IOART_DESTPHY; high = intpin->io_cpu << APIC_ID_SHIFT; /* Program the rest of the low word. */ if (intpin->io_edgetrigger) low |= IOART_TRGREDG; else low |= IOART_TRGRLVL; if (intpin->io_activehi) low |= IOART_INTAHI; else low |= IOART_INTALO; if (intpin->io_masked) low |= IOART_INTMSET; switch (intpin->io_irq) { case IRQ_EXTINT: KASSERT(intpin->io_edgetrigger, ("ExtINT not edge triggered")); low |= IOART_DELEXINT; break; case IRQ_NMI: KASSERT(intpin->io_edgetrigger, ("NMI not edge triggered")); low |= IOART_DELNMI; break; case IRQ_SMI: KASSERT(intpin->io_edgetrigger, ("SMI not edge triggered")); low |= IOART_DELSMI; break; default: KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u", intpin->io_irq)); low |= IOART_DELFIXED | intpin->io_vector; } /* Write the values to the APIC. */ ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), high); intpin->io_lowreg = low; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); } static void ioapic_reprogram_intpin(struct intsrc *isrc) { mtx_lock_spin(&icu_lock); ioapic_program_intpin((struct ioapic_intsrc *)isrc); mtx_unlock_spin(&icu_lock); } static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; u_int old_vector, new_vector; u_int old_id; /* * On Hyper-V: * - Stick to the first cpu for all I/O APIC pins. * - And don't allow destination cpu changes. */ if (vm_guest == VM_GUEST_HV) { if (intpin->io_vector) return (EINVAL); else apic_id = 0; } /* * keep 1st core as the destination for NMI */ if (intpin->io_irq == IRQ_NMI) apic_id = 0; /* * Set us up to free the old irq. */ old_vector = intpin->io_vector; old_id = intpin->io_cpu; if (old_vector && apic_id == old_id) return (0); /* * Allocate an APIC vector for this interrupt pin. Once * we have a vector we program the interrupt pin. */ new_vector = apic_alloc_vector(apic_id, intpin->io_irq); if (new_vector == 0) return (ENOSPC); /* * Mask the old intpin if it is enabled while it is migrated. * * At least some level-triggered interrupts seem to need the * extra DELAY() to avoid being stuck in a non-EOI'd state. */ mtx_lock_spin(&icu_lock); if (!intpin->io_masked && !intpin->io_edgetrigger) { ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), intpin->io_lowreg | IOART_INTMSET); mtx_unlock_spin(&icu_lock); DELAY(100); mtx_lock_spin(&icu_lock); } intpin->io_cpu = apic_id; intpin->io_vector = new_vector; if (isrc->is_handlers > 0) apic_enable_vector(intpin->io_cpu, intpin->io_vector); if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); ioapic_print_irq(intpin); printf(") to lapic %u vector %u\n", intpin->io_cpu, intpin->io_vector); } ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); /* * Free the old vector after the new one is established. This is done * to prevent races where we could miss an interrupt. */ if (old_vector) { if (isrc->is_handlers > 0) apic_disable_vector(old_id, old_vector); apic_free_vector(old_id, old_vector, intpin->io_irq); } return (0); } static void ioapic_enable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 0) panic("Couldn't find an APIC vector for IRQ %d", intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); } static void ioapic_disable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; u_int vector; if (intpin->io_vector != 0) { /* Mask this interrupt pin and free its APIC vector. */ vector = intpin->io_vector; apic_disable_vector(intpin->io_cpu, vector); mtx_lock_spin(&icu_lock); intpin->io_masked = 1; intpin->io_vector = 0; ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); apic_free_vector(intpin->io_cpu, vector, intpin->io_irq); } } static int ioapic_vector(struct intsrc *isrc) { struct ioapic_intsrc *pin; pin = (struct ioapic_intsrc *)isrc; return (pin->io_irq); } static int ioapic_source_pending(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) return 0; return (lapic_intr_pending(intpin->io_vector)); } static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; int changed; KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM), ("%s: Conforming trigger or polarity\n", __func__)); /* * EISA interrupts always use active high polarity, so don't allow * them to be set to active low. * * XXX: Should we write to the ELCR if the trigger mode changes for * an EISA IRQ or an ISA IRQ with the ELCR present? */ mtx_lock_spin(&icu_lock); if (intpin->io_bus == APIC_BUS_EISA) pol = INTR_POLARITY_HIGH; changed = 0; if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) { if (bootverbose) printf("ioapic%u: Changing trigger for pin %u to %s\n", io->io_id, intpin->io_intpin, trig == INTR_TRIGGER_EDGE ? "edge" : "level"); intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE); changed++; } if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) { if (bootverbose) printf("ioapic%u: Changing polarity for pin %u to %s\n", io->io_id, intpin->io_intpin, pol == INTR_POLARITY_HIGH ? "high" : "low"); intpin->io_activehi = (pol == INTR_POLARITY_HIGH); changed++; } if (changed) ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); return (0); } static void ioapic_resume(struct pic *pic, bool suspend_cancelled) { struct ioapic *io = (struct ioapic *)pic; int i; mtx_lock_spin(&icu_lock); for (i = 0; i < io->io_numintr; i++) ioapic_program_intpin(&io->io_pins[i]); mtx_unlock_spin(&icu_lock); } /* * Create a plain I/O APIC object. */ void * ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) { struct ioapic *io; struct ioapic_intsrc *intpin; volatile ioapic_t *apic; u_int numintr, i; uint32_t value; /* Map the register window so we can access the device. */ apic = pmap_mapdev(addr, IOAPIC_MEM_REGION); mtx_lock_spin(&icu_lock); value = ioapic_read(apic, IOAPIC_VER); mtx_unlock_spin(&icu_lock); /* If it's version register doesn't seem to work, punt. */ if (value == 0xffffffff) { pmap_unmapdev((vm_offset_t)apic, IOAPIC_MEM_REGION); return (NULL); } /* Determine the number of vectors and set the APIC ID. */ numintr = ((value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1; io = malloc(sizeof(struct ioapic) + numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK); io->io_pic = ioapic_template; io->pci_dev = NULL; io->pci_wnd = NULL; mtx_lock_spin(&icu_lock); io->io_id = next_id++; io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); io->io_apic_id = apic_id; printf("ioapic%u: Changing APIC ID to %d\n", io->io_id, apic_id); } else mtx_unlock_spin(&icu_lock); if (intbase == -1) { intbase = next_ioapic_base; printf("ioapic%u: Assuming intbase of %d\n", io->io_id, intbase); } else if (intbase != next_ioapic_base && bootverbose) printf("ioapic%u: WARNING: intbase %d != expected base %d\n", io->io_id, intbase, next_ioapic_base); io->io_intbase = intbase; next_ioapic_base = intbase + numintr; if (next_ioapic_base > num_io_irqs) num_io_irqs = next_ioapic_base; io->io_numintr = numintr; io->io_addr = apic; io->io_paddr = addr; if (bootverbose) { printf("ioapic%u: ver 0x%02x maxredir 0x%02x\n", io->io_id, (value & IOART_VER_VERSION), (value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT); } /* * The summary information about IO-APIC versions is taken from * the Linux kernel source: * 0Xh 82489DX * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant * 2Xh I/O(x)APIC which is PCI 2.2 Compliant * 30h-FFh Reserved * IO-APICs with version >= 0x20 have working EOIR register. */ io->io_haseoi = (value & IOART_VER_VERSION) >= 0x20; /* * Initialize pins. Start off with interrupts disabled. Default * to active-hi and edge-triggered for ISA interrupts and active-lo * and level-triggered for all others. */ bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr); mtx_lock_spin(&icu_lock); for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) { intpin->io_intsrc.is_pic = (struct pic *)io; intpin->io_intpin = i; intpin->io_irq = intbase + i; /* * Assume that pin 0 on the first I/O APIC is an ExtINT pin. * Assume that pins 1-15 are ISA interrupts and that all * other pins are PCI interrupts. */ if (intpin->io_irq == 0) ioapic_set_extint(io, i); else if (intpin->io_irq < IOAPIC_ISA_INTS) { intpin->io_bus = APIC_BUS_ISA; intpin->io_activehi = 1; intpin->io_edgetrigger = 1; intpin->io_masked = 1; } else { intpin->io_bus = APIC_BUS_PCI; intpin->io_activehi = 0; intpin->io_edgetrigger = 0; intpin->io_masked = 1; } /* * Route interrupts to the BSP by default. Interrupts may * be routed to other CPUs later after they are enabled. */ intpin->io_cpu = PCPU_GET(apic_id); value = ioapic_read(apic, IOAPIC_REDTBL_LO(i)); ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET); #ifdef ACPI_DMAR /* dummy, but sets cookie */ mtx_unlock_spin(&icu_lock); iommu_map_ioapic_intr(io->io_apic_id, intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger, intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie, NULL, NULL); mtx_lock_spin(&icu_lock); #endif } mtx_unlock_spin(&icu_lock); return (io); } int ioapic_get_vector(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (-1); return (io->io_pins[pin].io_irq); } int ioapic_disable_pin(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_DISABLED) return (EINVAL); io->io_pins[pin].io_irq = IRQ_DISABLED; if (bootverbose) printf("ioapic%u: intpin %d disabled\n", io->io_id, pin); return (0); } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || vector < 0) return (EINVAL); if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_irq = vector; if (bootverbose) printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id, vector, pin); return (0); } int ioapic_set_bus(void *cookie, u_int pin, int bus_type) { struct ioapic *io; if (bus_type < 0 || bus_type > APIC_BUS_MAX) return (EINVAL); io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq < 0) return (EINVAL); if (io->io_pins[pin].io_bus == bus_type) return (0); io->io_pins[pin].io_bus = bus_type; if (bootverbose) printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin, ioapic_bus_string(bus_type)); return (0); } int ioapic_set_nmi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_NMI) return (0); if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_NMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing NMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_smi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_SMI) return (0); if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_SMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing SMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_extint(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_EXTINT) return (0); if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_EXTINT; if (enable_extint) io->io_pins[pin].io_masked = 0; else io->io_pins[pin].io_masked = 1; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing external 8259A's -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { struct ioapic *io; int activehi; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (io->io_pins[pin].io_irq < 0) return (EINVAL); activehi = (pol == INTR_POLARITY_HIGH); if (io->io_pins[pin].io_activehi == activehi) return (0); io->io_pins[pin].io_activehi = activehi; if (bootverbose) printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { struct ioapic *io; int edgetrigger; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (io->io_pins[pin].io_irq < 0) return (EINVAL); edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (io->io_pins[pin].io_edgetrigger == edgetrigger) return (0); io->io_pins[pin].io_edgetrigger = edgetrigger; if (bootverbose) printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Register a complete I/O APIC object with the interrupt subsystem. */ void ioapic_register(void *cookie) { struct ioapic_intsrc *pin; struct ioapic *io; volatile ioapic_t *apic; uint32_t flags; int i; io = (struct ioapic *)cookie; apic = io->io_addr; mtx_lock_spin(&icu_lock); flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION; STAILQ_INSERT_TAIL(&ioapic_list, io, io_next); mtx_unlock_spin(&icu_lock); printf("ioapic%u irqs %u-%u on motherboard\n", io->io_id, flags >> 4, flags & 0xf, io->io_intbase, io->io_intbase + io->io_numintr - 1); /* * Reprogram pins to handle special case pins (such as NMI and * SMI) and disable normal pins until a handler is registered. */ intr_register_pic(&io->io_pic); for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) ioapic_reprogram_intpin(&pin->io_intsrc); } /* * Add interrupt sources for I/O APIC interrupt pins. */ static void ioapic_register_sources(struct pic *pic) { struct ioapic_intsrc *pin; struct ioapic *io; int i; io = (struct ioapic *)pic; for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { if (pin->io_irq >= 0) intr_register_source(&pin->io_intsrc); } } /* A simple new-bus driver to consume PCI I/O APIC devices. */ static int ioapic_pci_probe(device_t dev) { if (pci_get_class(dev) == PCIC_BASEPERIPH && pci_get_subclass(dev) == PCIS_BASEPERIPH_PIC) { switch (pci_get_progif(dev)) { case PCIP_BASEPERIPH_PIC_IO_APIC: device_set_desc(dev, "IO APIC"); break; case PCIP_BASEPERIPH_PIC_IOX_APIC: device_set_desc(dev, "IO(x) APIC"); break; default: return (ENXIO); } device_quiet(dev); return (-10000); } return (ENXIO); } static int ioapic_pci_attach(device_t dev) { struct resource *res; volatile ioapic_t *apic; struct ioapic *io; int rid; u_int apic_id; /* * Try to match the enumerated ioapic. Match BAR start * against io_paddr. Due to a fear that PCI window is not the * same as the MADT reported io window, but an alias, read the * APIC ID from the mapped BAR and match against it. */ rid = PCIR_BAR(0); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE | RF_SHAREABLE); if (res == NULL) { if (bootverbose) device_printf(dev, "cannot activate BAR0\n"); return (ENXIO); } apic = (volatile ioapic_t *)rman_get_virtual(res); if (rman_get_size(res) < IOAPIC_WND_SIZE) { if (bootverbose) device_printf(dev, "BAR0 too small (%jd) for IOAPIC window\n", (uintmax_t)rman_get_size(res)); goto fail; } mtx_lock_spin(&icu_lock); apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; /* First match by io window address */ STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_paddr == (vm_paddr_t)rman_get_start(res)) goto found; } /* Then by apic id */ STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_apic_id == apic_id) goto found; } mtx_unlock_spin(&icu_lock); if (bootverbose) device_printf(dev, "cannot match pci bar apic id %d against MADT\n", apic_id); fail: bus_release_resource(dev, SYS_RES_MEMORY, rid, res); return (ENXIO); found: KASSERT(io->pci_dev == NULL, ("ioapic %d pci_dev not NULL", io->io_id)); KASSERT(io->pci_wnd == NULL, ("ioapic %d pci_wnd not NULL", io->io_id)); io->pci_dev = dev; io->pci_wnd = res; if (bootverbose && (io->io_paddr != (vm_paddr_t)rman_get_start(res) || io->io_apic_id != apic_id)) { device_printf(dev, "pci%d:%d:%d:%d pci BAR0@%jx id %d " "MADT id %d paddr@%jx\n", pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), (uintmax_t)rman_get_start(res), apic_id, io->io_apic_id, (uintmax_t)io->io_paddr); } mtx_unlock_spin(&icu_lock); return (0); } static device_method_t ioapic_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioapic_pci_probe), DEVMETHOD(device_attach, ioapic_pci_attach), { 0, 0 } }; DEFINE_CLASS_0(ioapic, ioapic_pci_driver, ioapic_pci_methods, 0); static devclass_t ioapic_devclass; DRIVER_MODULE(ioapic, pci, ioapic_pci_driver, ioapic_devclass, 0, 0); int ioapic_get_rid(u_int apic_id, uint16_t *ridp) { struct ioapic *io; uintptr_t rid; int error; mtx_lock_spin(&icu_lock); STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_apic_id == apic_id) break; } mtx_unlock_spin(&icu_lock); if (io == NULL || io->pci_dev == NULL) return (EINVAL); error = pci_get_id(io->pci_dev, PCI_ID_RID, &rid); if (error != 0) return (error); *ridp = rid; return (0); } /* * A new-bus driver to consume the memory resources associated with * the APICs in the system. On some systems ACPI or PnPBIOS system * resource devices may already claim these resources. To keep from * breaking those devices, we attach ourself to the nexus device after * legacy0 and acpi0 and ignore any allocation failures. */ static void apic_identify(driver_t *driver, device_t parent) { /* * Add at order 12. acpi0 is probed at order 10 and legacy0 * is probed at order 11. */ if (lapic_paddr != 0) BUS_ADD_CHILD(parent, 12, "apic", 0); } static int apic_probe(device_t dev) { device_set_desc(dev, "APIC resources"); device_quiet(dev); return (0); } static void apic_add_resource(device_t dev, int rid, vm_paddr_t base, size_t length) { int error; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, base, length); if (error) panic("apic_add_resource: resource %d failed set with %d", rid, error); bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_SHAREABLE); } static int apic_attach(device_t dev) { struct ioapic *io; int i; /* Reserve the local APIC. */ apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION); i = 1; STAILQ_FOREACH(io, &ioapic_list, io_next) { apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION); i++; } return (0); } static device_method_t apic_methods[] = { /* Device interface */ DEVMETHOD(device_identify, apic_identify), DEVMETHOD(device_probe, apic_probe), DEVMETHOD(device_attach, apic_attach), { 0, 0 } }; DEFINE_CLASS_0(apic, apic_driver, apic_methods, 0); static devclass_t apic_devclass; DRIVER_MODULE(apic, nexus, apic_driver, apic_devclass, 0, 0); #include "opt_ddb.h" #ifdef DDB #include static const char * ioapic_delivery_mode(uint32_t mode) { switch (mode) { case IOART_DELFIXED: return ("fixed"); case IOART_DELLOPRI: return ("lowestpri"); case IOART_DELSMI: return ("SMI"); case IOART_DELRSV1: return ("rsrvd1"); case IOART_DELNMI: return ("NMI"); case IOART_DELINIT: return ("INIT"); case IOART_DELRSV2: return ("rsrvd2"); case IOART_DELEXINT: return ("ExtINT"); default: return (""); } } static u_int db_ioapic_read(volatile ioapic_t *apic, int reg) { apic->ioregsel = reg; return (apic->iowin); } static void db_show_ioapic_one(volatile ioapic_t *io_addr) { uint32_t r, lo, hi; int mre, i; r = db_ioapic_read(io_addr, IOAPIC_VER); mre = (r & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT; db_printf("Id 0x%08x Ver 0x%02x MRE %d\n", db_ioapic_read(io_addr, IOAPIC_ID), r & IOART_VER_VERSION, mre); for (i = 0; i < mre; i++) { lo = db_ioapic_read(io_addr, IOAPIC_REDTBL_LO(i)); hi = db_ioapic_read(io_addr, IOAPIC_REDTBL_HI(i)); db_printf(" pin %d Dest %s/%x %smasked Trig %s RemoteIRR %d " "Polarity %s Status %s DeliveryMode %s Vec %d\n", i, (lo & IOART_DESTMOD) == IOART_DESTLOG ? "log" : "phy", (hi & IOART_DEST) >> 24, (lo & IOART_INTMASK) == IOART_INTMSET ? "" : "not", (lo & IOART_TRGRMOD) == IOART_TRGRLVL ? "lvl" : "edge", (lo & IOART_REM_IRR) == IOART_REM_IRR ? 1 : 0, (lo & IOART_INTPOL) == IOART_INTALO ? "low" : "high", (lo & IOART_DELIVS) == IOART_DELIVS ? "pend" : "idle", ioapic_delivery_mode(lo & IOART_DELMOD), (lo & IOART_INTVEC)); } } DB_SHOW_COMMAND(ioapic, db_show_ioapic) { struct ioapic *ioapic; int idx, i; if (!have_addr) { db_printf("usage: show ioapic index\n"); return; } idx = (int)addr; i = 0; STAILQ_FOREACH(ioapic, &ioapic_list, io_next) { if (idx == i) { db_show_ioapic_one(ioapic->io_addr); break; } i++; } } DB_SHOW_ALL_COMMAND(ioapics, db_show_all_ioapics) { struct ioapic *ioapic; STAILQ_FOREACH(ioapic, &ioapic_list, io_next) db_show_ioapic_one(ioapic->io_addr); } #endif Index: head/sys/x86/x86/local_apic.c =================================================================== --- head/sys/x86/x86/local_apic.c (revision 344854) +++ head/sys/x86/x86/local_apic.c (revision 344855) @@ -1,2182 +1,2182 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. + * Copyright (c) 2003 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Local APIC support on Pentium and later processors. */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define GSEL_APIC 0 #else #define SDT_APIC SDT_SYS386IGT #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) #endif static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* * I/O interrupts use non-negative IRQ values. These values are used * to mark unused IDT entries or IDT entries reserved for a non-I/O * interrupt. */ #define IRQ_FREE -1 #define IRQ_TIMER -2 #define IRQ_SYSCALL -3 #define IRQ_DTRACE_RET -4 #define IRQ_EVTCHN -5 enum lat_timer_mode { LAT_MODE_UNDEF = 0, LAT_MODE_PERIODIC = 1, LAT_MODE_ONESHOT = 2, LAT_MODE_DEADLINE = 3, }; /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts * from I/O devices and then forward them on to the local APICs. * * Local APICs can also send interrupts to each other thus providing the * mechanism for IPIs. */ struct lvt { u_int lvt_edgetrigger:1; u_int lvt_activehi:1; u_int lvt_masked:1; u_int lvt_active:1; u_int lvt_mode:16; u_int lvt_vector:8; }; struct lapic { struct lvt la_lvts[APIC_LVT_MAX + 1]; struct lvt la_elvts[APIC_ELVT_MAX + 1];; u_int la_id:8; u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; u_long *la_timer_count; uint64_t la_timer_period; enum lat_timer_mode la_timer_mode; uint32_t lvt_timer_base; uint32_t lvt_timer_last; /* Include IDT_SYSCALL to make indexing easier. */ int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static *lapics; /* Global defaults for local APIC LVT entries. */ static struct lvt lvts[APIC_LVT_MAX + 1] = { { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ }; /* Global defaults for AMD local APIC ELVT entries. */ static struct lvt elvts[APIC_ELVT_MAX + 1] = { { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, }; static inthand_t *ioint_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1), /* 32 - 63 */ IDTVEC(apic_isr2), /* 64 - 95 */ IDTVEC(apic_isr3), /* 96 - 127 */ IDTVEC(apic_isr4), /* 128 - 159 */ IDTVEC(apic_isr5), /* 160 - 191 */ IDTVEC(apic_isr6), /* 192 - 223 */ IDTVEC(apic_isr7), /* 224 - 255 */ }; static inthand_t *ioint_pti_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1_pti), /* 32 - 63 */ IDTVEC(apic_isr2_pti), /* 64 - 95 */ IDTVEC(apic_isr3_pti), /* 96 - 127 */ IDTVEC(apic_isr4_pti), /* 128 - 159 */ IDTVEC(apic_isr5_pti), /* 160 - 191 */ IDTVEC(apic_isr6_pti), /* 192 - 223 */ IDTVEC(apic_isr7_pti), /* 224 - 255 */ }; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); volatile char *lapic_map; vm_paddr_t lapic_paddr; int x2apic_mode; int lapic_eoi_suppression; static int lapic_timer_tsc_deadline; static u_long lapic_timer_divisor, count_freq; static struct eventtimer lapic_et; #ifdef SMP static uint64_t lapic_ipi_wait_mult; #endif unsigned int max_apic_id; SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, &lapic_eoi_suppression, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, &lapic_timer_tsc_deadline, 0, ""); static void lapic_calibrate_initcount(struct lapic *la); static void lapic_calibrate_deadline(struct lapic *la); static uint32_t lapic_read32(enum LAPIC_REGISTERS reg) { uint32_t res; if (x2apic_mode) { res = rdmsr32(MSR_APIC_000 + reg); } else { res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); } return (res); } static void lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { mfence(); lfence(); wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } static void lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } #ifdef SMP static uint64_t lapic_read_icr(void) { uint64_t v; uint32_t vhi, vlo; if (x2apic_mode) { v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); } else { vhi = lapic_read32(LAPIC_ICR_HI); vlo = lapic_read32(LAPIC_ICR_LO); v = ((uint64_t)vhi << 32) | vlo; } return (v); } static uint64_t lapic_read_icr_lo(void) { return (lapic_read32(LAPIC_ICR_LO)); } static void lapic_write_icr(uint32_t vhi, uint32_t vlo) { uint64_t v; if (x2apic_mode) { v = ((uint64_t)vhi << 32) | vlo; mfence(); wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); } else { lapic_write32(LAPIC_ICR_HI, vhi); lapic_write32(LAPIC_ICR_LO, vlo); } } #endif /* SMP */ static void native_lapic_enable_x2apic(void) { uint64_t apic_base; apic_base = rdmsr(MSR_APICBASE); apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; wrmsr(MSR_APICBASE, apic_base); } static bool native_lapic_is_x2apic(void) { uint64_t apic_base; apic_base = rdmsr(MSR_APICBASE); return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == (APICBASE_X2APIC | APICBASE_ENABLED)); } static void lapic_enable(void); static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *); static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); static void lapic_timer_periodic(struct lapic *); static void lapic_timer_deadline(struct lapic *); static void lapic_timer_stop(struct lapic *); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period); static int lapic_et_stop(struct eventtimer *et); static u_int apic_idt_to_irq(u_int apic_id, u_int vector); static void lapic_set_tpr(u_int vector); struct pic lapic_pic = { .pic_resume = lapic_resume }; /* Forward declarations for apic_ops */ static void native_lapic_create(u_int apic_id, int boot_cpu); static void native_lapic_init(vm_paddr_t addr); static void native_lapic_xapic_mode(void); static void native_lapic_setup(int boot); static void native_lapic_dump(const char *str); static void native_lapic_disable(void); static void native_lapic_eoi(void); static int native_lapic_id(void); static int native_lapic_intr_pending(u_int vector); static u_int native_apic_cpuid(u_int apic_id); static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align); static void native_apic_disable_vector(u_int apic_id, u_int vector); static void native_apic_enable_vector(u_int apic_id, u_int vector); static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); static int native_lapic_enable_pmc(void); static void native_lapic_disable_pmc(void); static void native_lapic_reenable_pmc(void); static void native_lapic_enable_cmc(void); static int native_lapic_enable_mca_elvt(void); static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, uint32_t mode); static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); #ifdef SMP static void native_lapic_ipi_raw(register_t icrlo, u_int dest); static void native_lapic_ipi_vectored(u_int vector, int dest); static int native_lapic_ipi_wait(int delay); #endif /* SMP */ static int native_lapic_ipi_alloc(inthand_t *ipifunc); static void native_lapic_ipi_free(int vector); struct apic_ops apic_ops = { .create = native_lapic_create, .init = native_lapic_init, .xapic_mode = native_lapic_xapic_mode, .is_x2apic = native_lapic_is_x2apic, .setup = native_lapic_setup, .dump = native_lapic_dump, .disable = native_lapic_disable, .eoi = native_lapic_eoi, .id = native_lapic_id, .intr_pending = native_lapic_intr_pending, .set_logical_id = native_lapic_set_logical_id, .cpuid = native_apic_cpuid, .alloc_vector = native_apic_alloc_vector, .alloc_vectors = native_apic_alloc_vectors, .enable_vector = native_apic_enable_vector, .disable_vector = native_apic_disable_vector, .free_vector = native_apic_free_vector, .enable_pmc = native_lapic_enable_pmc, .disable_pmc = native_lapic_disable_pmc, .reenable_pmc = native_lapic_reenable_pmc, .enable_cmc = native_lapic_enable_cmc, .enable_mca_elvt = native_lapic_enable_mca_elvt, #ifdef SMP .ipi_raw = native_lapic_ipi_raw, .ipi_vectored = native_lapic_ipi_vectored, .ipi_wait = native_lapic_ipi_wait, #endif .ipi_alloc = native_lapic_ipi_alloc, .ipi_free = native_lapic_ipi_free, .set_lvt_mask = native_lapic_set_lvt_mask, .set_lvt_mode = native_lapic_set_lvt_mode, .set_lvt_polarity = native_lapic_set_lvt_polarity, .set_lvt_triggermode = native_lapic_set_lvt_triggermode, }; static uint32_t lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) { value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | APIC_LVT_VECTOR); if (lvt->lvt_edgetrigger == 0) value |= APIC_LVT_TM; if (lvt->lvt_activehi == 0) value |= APIC_LVT_IIPP_INTALO; if (lvt->lvt_masked) value |= APIC_LVT_M; value |= lvt->lvt_mode; switch (lvt->lvt_mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: if (!lvt->lvt_edgetrigger && bootverbose) { printf("lapic%u: Forcing LINT%u to edge trigger\n", la->la_id, pin); value &= ~APIC_LVT_TM; } /* Use a vector of 0. */ break; case APIC_LVT_DM_FIXED: value |= lvt->lvt_vector; break; default: panic("bad APIC LVT delivery mode: %#x\n", value); } return (value); } static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value) { struct lvt *lvt; KASSERT(pin <= APIC_LVT_MAX, ("%s: pin %u out of range", __func__, pin)); if (la->la_lvts[pin].lvt_active) lvt = &la->la_lvts[pin]; else lvt = &lvts[pin]; return (lvt_mode_impl(la, lvt, pin, value)); } static uint32_t elvt_mode(struct lapic *la, u_int idx, uint32_t value) { struct lvt *elvt; KASSERT(idx <= APIC_ELVT_MAX, ("%s: idx %u out of range", __func__, idx)); elvt = &la->la_elvts[idx]; KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); KASSERT(elvt->lvt_edgetrigger, ("%s: ELVT%u is not edge triggered", __func__, idx)); KASSERT(elvt->lvt_activehi, ("%s: ELVT%u is not active high", __func__, idx)); return (lvt_mode_impl(la, elvt, idx, value)); } /* * Map the local APIC and setup necessary interrupt vectors. */ static void native_lapic_init(vm_paddr_t addr) { #ifdef SMP uint64_t r, r1, r2, rx; #endif uint32_t ver; u_int regs[4]; int i, arat; /* * Enable x2APIC mode if possible. Map the local APIC * registers page. * * Keep the LAPIC registers page mapped uncached for x2APIC * mode too, to have direct map page attribute set to * uncached. This is needed to work around CPU errata present * on all Intel processors. */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic_paddr = addr; lapic_map = pmap_mapdev(addr, PAGE_SIZE); if (x2apic_mode) { native_lapic_enable_x2apic(); lapic_map = NULL; } /* Setup the spurious interrupt handler. */ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Local APIC error interrupt. */ setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); /* XXX: Thermal interrupt */ /* Local APIC CMCI. */ setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), SDT_APIC, SEL_KPL, GSEL_APIC); if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { arat = 0; /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { do_cpuid(0x06, regs); if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; } else if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) >= 0x12) { arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; lapic_et.et_quality = 100; } if ((cpu_feature & CPUID_TSC) != 0 && (cpu_feature2 & CPUID2_TSCDLT) != 0 && tsc_is_invariant && tsc_freq != 0) { lapic_timer_tsc_deadline = 1; TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", &lapic_timer_tsc_deadline); } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ lapic_et.et_min_period = 0x00001000LL; lapic_et.et_max_period = SBT_1S; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; et_register(&lapic_et); } /* * Set lapic_eoi_suppression after lapic_enable(), to not * enable suppression in the hardware prematurely. Note that * we by default enable suppression even when system only has * one IO-APIC, since EOI is broadcasted to all APIC agents, * including CPUs, otherwise. * * It seems that at least some KVM versions report * EOI_SUPPRESSION bit, but auto-EOI does not work. */ ver = lapic_read32(LAPIC_VERSION); if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { lapic_eoi_suppression = 1; if (vm_guest == VM_GUEST_KVM) { if (bootverbose) printf( "KVM -- disabling lapic eoi suppression\n"); lapic_eoi_suppression = 0; } TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", &lapic_eoi_suppression); } #ifdef SMP #define LOOPS 100000 /* * Calibrate the busy loop waiting for IPI ack in xAPIC mode. * lapic_ipi_wait_mult contains the number of iterations which * approximately delay execution for 1 microsecond (the * argument to native_lapic_ipi_wait() is in microseconds). * * We assume that TSC is present and already measured. * Possible TSC frequency jumps are irrelevant to the * calibration loop below, the CPU clock management code is * not yet started, and we do not enter sleep states. */ KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, ("TSC not initialized")); if (!x2apic_mode) { r = rdtsc(); for (rx = 0; rx < LOOPS; rx++) { (void)lapic_read_icr_lo(); ia32_pause(); } r = rdtsc() - r; r1 = tsc_freq * LOOPS; r2 = r * 1000000; lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; if (bootverbose) { printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, (uintmax_t)r, (uintmax_t)tsc_freq); } } #undef LOOPS #endif /* SMP */ } /* * Create a local APIC instance. */ static void native_lapic_create(u_int apic_id, int boot_cpu) { int i; if (apic_id > max_apic_id) { printf("APIC: Ignoring local APIC with ID %d\n", apic_id); if (boot_cpu) panic("Can't ignore BSP"); return; } KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", apic_id)); /* * Assume no local LVT overrides and a cluster of 0 and * intra-cluster ID of 0. */ lapics[apic_id].la_present = 1; lapics[apic_id].la_id = apic_id; for (i = 0; i <= APIC_LVT_MAX; i++) { lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } for (i = 0; i <= APIC_ELVT_MAX; i++) { lapics[apic_id].la_elvts[i] = elvts[i]; lapics[apic_id].la_elvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif #ifdef XENHVM lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; #endif #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } static inline uint32_t amd_read_ext_features(void) { uint32_t version; if (cpu_vendor_id != CPU_VENDOR_AMD) return (0); version = lapic_read32(LAPIC_VERSION); if ((version & APIC_VER_AMD_EXT_SPACE) != 0) return (lapic_read32(LAPIC_EXT_FEATURES)); else return (0); } static inline uint32_t amd_read_elvt_count(void) { uint32_t extf; uint32_t count; extf = amd_read_ext_features(); count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; count = min(count, APIC_ELVT_MAX + 1); return (count); } /* * Dump contents of local APIC registers */ static void native_lapic_dump(const char* str) { uint32_t version; uint32_t maxlvt; uint32_t extf; int elvt_count; int i; version = lapic_read32(LAPIC_VERSION); maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", lapic_read32(LAPIC_ID), version, lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); if ((cpu_feature2 & CPUID2_X2APIC) != 0) printf(" x2APIC: %d", x2apic_mode); printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), lapic_read32(LAPIC_LVT_ERROR)); if (maxlvt >= APIC_LVT_PMC) printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); printf("\n"); if (maxlvt >= APIC_LVT_CMCI) printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); extf = amd_read_ext_features(); if (extf != 0) { printf(" AMD ext features: 0x%08x\n", extf); elvt_count = amd_read_elvt_count(); for (i = 0; i < elvt_count; i++) printf(" AMD elvt%d: 0x%08x\n", i, lapic_read32(LAPIC_EXT_LVT0 + i)); } } static void native_lapic_xapic_mode(void) { register_t saveintr; saveintr = intr_disable(); if (x2apic_mode) native_lapic_enable_x2apic(); intr_restore(saveintr); } static void native_lapic_setup(int boot) { struct lapic *la; uint32_t version; uint32_t maxlvt; register_t saveintr; int elvt_count; int i; saveintr = intr_disable(); la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); version = lapic_read32(LAPIC_VERSION); maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); /* Setup spurious vector and enable the local APIC. */ lapic_enable(); /* Program LINT[01] LVT entries. */ lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, lapic_read32(LAPIC_LVT_LINT0))); lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, lapic_read32(LAPIC_LVT_LINT1))); /* Program the PMC LVT entry if present. */ if (maxlvt >= APIC_LVT_PMC) { lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, LAPIC_LVT_PCINT)); } /* Program timer LVT. */ la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, lapic_read32(LAPIC_LVT_TIMER)); la->lvt_timer_last = la->lvt_timer_base; lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); /* Calibrate the timer parameters using BSP. */ if (boot && IS_BSP()) { lapic_calibrate_initcount(la); if (lapic_timer_tsc_deadline) lapic_calibrate_deadline(la); } /* Setup the timer if configured. */ if (la->la_timer_mode != LAT_MODE_UNDEF) { KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", lapic_id())); switch (la->la_timer_mode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_periodic(la); break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(la); break; case LAT_MODE_DEADLINE: lapic_timer_deadline(la); break; default: panic("corrupted la_timer_mode %p %d", la, la->la_timer_mode); } } /* Program error LVT and clear any existing errors. */ lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, lapic_read32(LAPIC_LVT_ERROR))); lapic_write32(LAPIC_ESR, 0); /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ if (maxlvt >= APIC_LVT_CMCI) { lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, lapic_read32(LAPIC_LVT_CMCI))); } elvt_count = amd_read_elvt_count(); for (i = 0; i < elvt_count; i++) { if (la->la_elvts[i].lvt_active) lapic_write32(LAPIC_EXT_LVT0 + i, elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); } intr_restore(saveintr); } static void native_lapic_intrcnt(void *dummy __unused) { struct pcpu *pc; struct lapic *la; char buf[MAXCOMLEN + 1]; /* If there are no APICs, skip this function. */ if (lapics == NULL) return; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { la = &lapics[pc->pc_apic_id]; if (!la->la_present) continue; snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); intrcnt_add(buf, &la->la_timer_count); } } SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, NULL); static void native_lapic_reenable_pmc(void) { #ifdef HWPMC_HOOKS uint32_t value; value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; lapic_write32(LAPIC_LVT_PCINT, value); #endif } #ifdef HWPMC_HOOKS static void lapic_update_pmc(void *dummy) { struct lapic *la; la = &lapics[lapic_id()]; lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, lapic_read32(LAPIC_LVT_PCINT))); } #endif static int native_lapic_enable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return (0); /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return (0); lvts[APIC_LVT_PMC].lvt_masked = 0; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #else #ifdef SMP /* * If hwpmc was loaded at boot time then the APs may not be * started yet. In that case, don't forward the request to * them as they will program the lvt when they start. */ if (smp_started) smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); else #endif lapic_update_pmc(NULL); #endif return (1); #else return (0); #endif } static void native_lapic_disable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return; /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return; lvts[APIC_LVT_PMC].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); #endif smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #endif } static void lapic_calibrate_initcount(struct lapic *la) { u_long value; /* Start off with a divisor of 2 (power on reset default). */ lapic_timer_divisor = 2; /* Try to calibrate the local APIC timer. */ do { lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); DELAY(1000000); value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; } while (lapic_timer_divisor <= 128); if (lapic_timer_divisor > 128) panic("lapic: Divisor too big"); if (bootverbose) { printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); } count_freq = value; } static void lapic_calibrate_deadline(struct lapic *la __unused) { if (bootverbose) { printf("lapic: deadline tsc mode, Frequency %ju Hz\n", (uintmax_t)tsc_freq); } } static void lapic_change_mode(struct eventtimer *et, struct lapic *la, enum lat_timer_mode newmode) { if (la->la_timer_mode == newmode) return; switch (newmode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; case LAT_MODE_DEADLINE: et->et_frequency = tsc_freq; break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; default: panic("lapic_change_mode %d", newmode); } la->la_timer_mode = newmode; et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; } static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; if (period != 0) { lapic_change_mode(et, la, LAT_MODE_PERIODIC); la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32; lapic_timer_periodic(la); } else if (lapic_timer_tsc_deadline) { lapic_change_mode(et, la, LAT_MODE_DEADLINE); la->la_timer_period = (et->et_frequency * first) >> 32; lapic_timer_deadline(la); } else { lapic_change_mode(et, la, LAT_MODE_ONESHOT); la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32; lapic_timer_oneshot(la); } return (0); } static int lapic_et_stop(struct eventtimer *et) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; lapic_timer_stop(la); la->la_timer_mode = LAT_MODE_UNDEF; return (0); } static void native_lapic_disable(void) { uint32_t value; /* Software disable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~APIC_SVR_SWEN; lapic_write32(LAPIC_SVR, value); } static void lapic_enable(void) { uint32_t value; /* Program the spurious vector to enable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; if (lapic_eoi_suppression) value |= APIC_SVR_EOI_SUPPRESSION; lapic_write32(LAPIC_SVR, value); } /* Reset the local APIC on the BSP during resume. */ static void lapic_resume(struct pic *pic, bool suspend_cancelled) { lapic_setup(0); } static int native_lapic_id(void) { uint32_t v; KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); v = lapic_read32(LAPIC_ID); if (!x2apic_mode) v >>= APIC_ID_SHIFT; return (v); } static int native_lapic_intr_pending(u_int vector) { uint32_t irr; /* * The IRR registers are an array of registers each of which * only describes 32 interrupts in the low 32 bits. Thus, we * divide the vector by 32 to get the register index. * Finally, we modulus the vector by 32 to determine the * individual bit to test. */ irr = lapic_read32(LAPIC_IRR0 + vector / 32); return (irr & 1 << (vector % 32)); } static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { struct lapic *la; KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", __func__, apic_id)); KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", __func__, cluster)); KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, ("%s: intra cluster id %u too big", __func__, cluster_id)); la = &lapics[apic_id]; la->la_cluster = cluster; la->la_cluster_id = cluster_id; } static int native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) { if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_masked = masked; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_masked = masked; lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); return (0); } static int native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) { struct lvt *lvt; if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvt = &lvts[pin]; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lvt = &lapics[apic_id].la_lvts[pin]; lvt->lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } lvt->lvt_mode = mode; switch (mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: lvt->lvt_edgetrigger = 1; lvt->lvt_activehi = 1; if (mode == APIC_LVT_DM_EXTINT) lvt->lvt_masked = 1; else lvt->lvt_masked = 0; break; default: panic("Unsupported delivery mode: 0x%x\n", mode); } if (bootverbose) { printf(" Routing "); switch (mode) { case APIC_LVT_DM_NMI: printf("NMI"); break; case APIC_LVT_DM_SMI: printf("SMI"); break; case APIC_LVT_DM_INIT: printf("INIT"); break; case APIC_LVT_DM_EXTINT: printf("ExtINT"); break; } printf(" -> LINT%u\n", pin); } return (0); } static int native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) { if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_active = 1; lapics[apic_id].la_lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u polarity: %s\n", pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) { if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u trigger: %s\n", pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Adjust the TPR of the current CPU so that it blocks all interrupts below * the passed in vector. */ static void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR lapic_write32(LAPIC_TPR, vector); #else uint32_t tpr; tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; tpr |= vector; lapic_write32(LAPIC_TPR, tpr); #endif } static void native_lapic_eoi(void) { lapic_write32_nofence(LAPIC_EOI, 0); } void lapic_handle_intr(int vector, struct trapframe *frame) { struct intsrc *isrc; isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), vector)); intr_execute_handlers(isrc, frame); } void lapic_handle_timer(struct trapframe *frame) { struct lapic *la; struct trapframe *oldframe; struct thread *td; /* Send EOI first thing. */ lapic_eoi(); #if defined(SMP) && !defined(SCHED_ULE) /* * Don't do any accounting for the disabled HTT cores, since it * will provide misleading numbers for the userland. * * No locking is necessary here, since even if we lose the race * when hlt_cpus_mask changes it is not a big deal, really. * * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask * and unlike other schedulers it actually schedules threads to * those CPUs. */ if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } critical_exit(); } static void lapic_timer_set_divisor(u_int divisor) { KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), ("lapic: invalid divisor %u", divisor)); lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); } static void lapic_timer_oneshot(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_ONE_SHOT; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) { uint32_t value; value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, count); } static void lapic_timer_periodic(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_PERIODIC; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_deadline(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_TSCDLT; if (value != la->lvt_timer_last) { la->lvt_timer_last = value; lapic_write32_nofence(LAPIC_LVT_TIMER, value); if (!x2apic_mode) mfence(); } wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); } static void lapic_timer_stop(struct lapic *la) { uint32_t value; if (la->la_timer_mode == LAT_MODE_DEADLINE) { wrmsr(MSR_TSC_DEADLINE, 0); mfence(); } else { value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); } } void lapic_handle_cmc(void) { lapic_eoi(); cmc_intr(); } /* * Called from the mca_init() to activate the CMC interrupt if this CPU is * responsible for monitoring any MC banks for CMC events. Since mca_init() * is called prior to lapic_setup() during boot, this just needs to unmask * this CPU's LVT_CMCI entry. */ static void native_lapic_enable_cmc(void) { u_int apic_id; #ifdef DEV_ATPIC if (!x2apic_mode && lapic_map == NULL) return; #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; if (bootverbose) printf("lapic%u: CMCI unmasked\n", apic_id); } static int native_lapic_enable_mca_elvt(void) { u_int apic_id; uint32_t value; int elvt_count; #ifdef DEV_ATPIC if (lapic_map == NULL) return (-1); #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); elvt_count = amd_read_elvt_count(); if (elvt_count <= APIC_ELVT_MCA) return (-1); value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); if ((value & APIC_LVT_M) == 0) { if (bootverbose) printf("AMD MCE Thresholding Extended LVT is already active\n"); return (APIC_ELVT_MCA); } lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; if (bootverbose) printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); return (APIC_ELVT_MCA); } void lapic_handle_error(void) { uint32_t esr; /* * Read the contents of the error status register. Write to * the register first before reading from it to force the APIC * to update its value to indicate any errors that have * occurred since the previous write to the register. */ lapic_write32(LAPIC_ESR, 0); esr = lapic_read32(LAPIC_ESR); printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); } static u_int native_apic_cpuid(u_int apic_id) { #ifdef SMP return apic_cpuids[apic_id]; #else return 0; #endif } /* Request a free IDT vector to be used by the specified IRQ. */ static u_int native_apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple * algorithm to find the first free vector. */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); return (0); } /* * Request 'count' free contiguous IDT vectors to be used by 'count' * IRQs. 'count' must be a power of two and the vectors will be * aligned on a boundary of 'align'. If the request cannot be * satisfied, 0 is returned. */ static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; KASSERT(powerof2(count), ("bad count")); KASSERT(powerof2(align), ("bad align")); KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif /* * Search for 'count' free vectors. As with apic_alloc_vector(), * this just uses a simple first fit algorithm. */ run = 0; first = 0; mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { run = 0; first = 0; continue; } /* Start a new run if run == 0 and vector is aligned. */ if (run == 0) { if ((vector & (align - 1)) != 0) continue; first = vector; } run++; /* Keep looping if the run isn't long enough yet. */ if (run < count) continue; /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) lapics[apic_id].la_ioint_irqs[first + vector] = irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); return (0); } /* * Enable a vector for a particular apic_id. Since all lapics share idt * entries and ioint_handlers this enables the vector on all lapics. lapics * which do not have the vector configured would report spurious interrupts * should it fire. */ static void native_apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } static void native_apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet /* * We can not currently clear the idt entry because other cpus * may have a valid vector at this offset. */ setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, SEL_KPL, GSEL_APIC); #endif } /* Release an APIC vector when it's no longer in use. */ static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) { struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif /* * Bind us to the cpu that owned the vector before freeing it so * we don't lose an interrupt delivery race. */ td = curthread; if (!rebooting) { thread_lock(td); if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); thread_unlock(td); } mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); sched_unbind(td); thread_unlock(td); } } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ static u_int apic_idt_to_irq(u_int apic_id, u_int vector) { int irq; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; return (irq); } #ifdef DDB /* * Dump data about APIC IDT vector mappings. */ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) verbose = 2; else if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { if (lapics[apic_id].la_present == 0) continue; db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == IRQ_FREE || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) continue; #endif #ifdef XENHVM if (irq == IRQ_EVTCHN) continue; #endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); else if (irq < num_io_irqs) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); else db_dump_intr_event(isrc->is_event, verbose == 2); } else db_printf("IRQ %u ???\n", irq); } } } static void dump_mask(const char *prefix, uint32_t v, int base) { int i, first; first = 1; for (i = 0; i < 32; i++) if (v & (1 << i)) { if (first) { db_printf("%s:", prefix); first = 0; } db_printf(" %02x", base + i); } if (!first) db_printf("\n"); } /* Show info from the lapic regs for this CPU. */ DB_SHOW_COMMAND(lapic, db_show_lapic) { uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); v = lapic_read32(LAPIC_VERSION); db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); v = lapic_read32(LAPIC_SVR); db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); #define dump_field(prefix, regn, index) \ dump_mask(__XSTRING(prefix ## index), \ lapic_read32(LAPIC_ ## regn ## index), \ index * 32) db_printf("In-service Interrupts:\n"); dump_field(isr, ISR, 0); dump_field(isr, ISR, 1); dump_field(isr, ISR, 2); dump_field(isr, ISR, 3); dump_field(isr, ISR, 4); dump_field(isr, ISR, 5); dump_field(isr, ISR, 6); dump_field(isr, ISR, 7); db_printf("TMR Interrupts:\n"); dump_field(tmr, TMR, 0); dump_field(tmr, TMR, 1); dump_field(tmr, TMR, 2); dump_field(tmr, TMR, 3); dump_field(tmr, TMR, 4); dump_field(tmr, TMR, 5); dump_field(tmr, TMR, 6); dump_field(tmr, TMR, 7); db_printf("IRR Interrupts:\n"); dump_field(irr, IRR, 0); dump_field(irr, IRR, 1); dump_field(irr, IRR, 2); dump_field(irr, IRR, 3); dump_field(irr, IRR, 4); dump_field(irr, IRR, 5); dump_field(irr, IRR, 6); dump_field(irr, IRR, 7); #undef dump_field } #endif /* * APIC probing support code. This includes code to manage enumerators. */ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; void apic_register_enumerator(struct apic_enumerator *enumerator) { #ifdef INVARIANTS struct apic_enumerator *apic_enum; SLIST_FOREACH(apic_enum, &enumerators, apic_next) { if (apic_enum == enumerator) panic("%s: Duplicate register of %s", __func__, enumerator->apic_name); } #endif SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); } /* * We have to look for CPU's very, very early because certain subsystems * want to know how many CPU's we have extremely early on in the boot * process. */ static void apic_init(void *dummy __unused) { struct apic_enumerator *enumerator; int retval, best; /* We only support built in local APICs. */ if (!(cpu_feature & CPUID_APIC)) return; /* Don't probe if APIC mode is disabled. */ if (resource_disabled("apic", 0)) return; /* Probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; SLIST_FOREACH(enumerator, &enumerators, apic_next) { retval = enumerator->apic_probe(); if (retval > 0) continue; if (best_enum == NULL || best < retval) { best_enum = enumerator; best = retval; } } if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); #ifndef DEV_ATPIC panic("running without device atpic requires a local APIC"); #endif return; } if (bootverbose) printf("APIC: Using the %s enumerator.\n", best_enum->apic_name); #ifdef I686_CPU /* * To work around an errata, we disable the local APIC on some * CPUs during early startup. We need to turn the local APIC back * on on such CPUs now. */ ppro_reenable_apic(); #endif /* Probe the CPU's in the system. */ retval = best_enum->apic_probe_cpus(); if (retval != 0) printf("%s: Failed to probe CPUs: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); /* * Setup the local APIC. We have to do this prior to starting up the APs * in the SMP case. */ static void apic_setup_local(void *dummy __unused) { int retval; if (best_enum == NULL) return; lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, M_WAITOK | M_ZERO); /* Initialize the local APIC. */ retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); /* * Setup the I/O APICs. */ static void apic_setup_io(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* * Local APIC must be registered before other PICs and pseudo PICs * for proper suspend/resume order. */ intr_register_pic(&lapic_pic); retval = best_enum->apic_setup_io(); if (retval != 0) printf("%s: Failed to setup I/O APICs: returned %d\n", best_enum->apic_name, retval); /* * Finish setting up the local APIC on the BSP once we know * how to properly program the LINT pins. In particular, this * enables the EOI suppression mode, if LAPIC supports it and * user did not disable the mode. */ lapic_setup(1); if (bootverbose) lapic_dump("BSP"); /* Enable the MSI "pic". */ init_ops.msi_init(); #ifdef XENHVM xen_intr_alloc_irqs(); #endif } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); #ifdef SMP /* * Inter Processor Interrupt functions. The lapic_ipi_*() functions are * private to the MD code. The public interface for the rest of the * kernel is defined in mp_machdep.c. */ /* * Wait delay microseconds for IPI to be sent. If delay is -1, we * wait forever. */ static int native_lapic_ipi_wait(int delay) { uint64_t rx; /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ if (x2apic_mode) return (1); for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } return (0); } static void native_lapic_ipi_raw(register_t icrlo, u_int dest) { uint64_t icr; uint32_t vhi, vlo; register_t saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(x2apic_mode || lapic_map != NULL, ("%s called too early", __func__)); KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ if (!x2apic_mode) { saveintr = intr_disable(); icr = lapic_read_icr(); } if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { if (x2apic_mode) { vhi = dest; } else { vhi = icr >> 32; vhi &= ~APIC_ID_MASK; vhi |= dest << APIC_ID_SHIFT; } } else { vhi = 0; } /* Program the contents of the IPI and dispatch it. */ if (x2apic_mode) { vlo = icrlo; } else { vlo = icr; vlo &= APIC_ICRLO_RESV_MASK; vlo |= icrlo; } lapic_write_icr(vhi, vlo); if (!x2apic_mode) intr_restore(saveintr); } #define BEFORE_SPIN 50000 #ifdef DETECT_DEADLOCK #define AFTER_SPIN 50 #endif static void native_lapic_ipi_vectored(u_int vector, int dest) { register_t icrlo, destfield; KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; /* * NMI IPIs are just fake vectors used to send a NMI. Use special rules * regarding NMIs if passed, otherwise specify the vector. */ if (vector >= IPI_NMI_FIRST) icrlo |= APIC_DELMODE_NMI; else icrlo |= vector | APIC_DELMODE_FIXED; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: icrlo |= APIC_DEST_SELF; break; case APIC_IPI_DEST_ALL: icrlo |= APIC_DEST_ALLISELF; break; case APIC_IPI_DEST_OTHERS: icrlo |= APIC_DEST_ALLESELF; break; default: KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid destination 0x%x", __func__, dest)); destfield = dest; } /* Wait for an earlier IPI to finish. */ if (!lapic_ipi_wait(BEFORE_SPIN)) { if (panicstr != NULL) return; else panic("APIC: Previous IPI is stuck"); } lapic_ipi_raw(icrlo, destfield); #ifdef DETECT_DEADLOCK /* Wait for IPI to be delivered. */ if (!lapic_ipi_wait(AFTER_SPIN)) { #ifdef needsattention /* * XXX FIXME: * * The above function waits for the message to actually be * delivered. It breaks out after an arbitrary timeout * since the message should eventually be delivered (at * least in theory) and that if it wasn't we would catch * the failure with the check above when the next IPI is * sent. * * We could skip this wait entirely, EXCEPT it probably * protects us from other routines that assume that the * message was delivered and acted upon when this function * returns. */ printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } #endif /* DETECT_DEADLOCK */ } #endif /* SMP */ /* * Since the IDT is shared by all CPUs the IPI slot update needs to be globally * visible. * * Consider the case where an IPI is generated immediately after allocation: * vector = lapic_ipi_alloc(ipifunc); * ipi_selected(other_cpus, vector); * * In xAPIC mode a write to ICR_LO has serializing semantics because the * APIC page is mapped as an uncached region. In x2APIC mode there is an * explicit 'mfence' before the ICR MSR is written. Therefore in both cases * the IDT slot update is globally visible before the IPI is delivered. */ static int native_lapic_ipi_alloc(inthand_t *ipifunc) { struct gate_descriptor *ip; long func; int idx, vector; KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), ("invalid ipifunc %p", ipifunc)); vector = -1; mtx_lock_spin(&icu_lock); for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { ip = &idt[idx]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { vector = idx; setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); break; } } mtx_unlock_spin(&icu_lock); return (vector); } static void native_lapic_ipi_free(int vector) { struct gate_descriptor *ip; long func; KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, ("%s: invalid vector %d", __func__, vector)); mtx_lock_spin(&icu_lock); ip = &idt[vector]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && func != (uintptr_t)&IDTVEC(rsvd_pti), ("invalid idtfunc %#lx", func)); setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, SEL_KPL, GSEL_APIC); mtx_unlock_spin(&icu_lock); } Index: head/sys/x86/x86/mptable.c =================================================================== --- head/sys/x86/x86/mptable.c (revision 344854) +++ head/sys/x86/x86/mptable.c (revision 344855) @@ -1,1262 +1,1262 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. + * Copyright (c) 2003 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mptable_force_htt.h" #include #include #include #include #include #include #include #ifdef NEW_PCIB #include #endif #include #include #include #include #ifdef NEW_PCIB #include #endif #include #include #include #include #include #include #include #ifdef NEW_PCIB #include #endif #include /* string defined by the Intel MP Spec as identifying the MP table */ #define MP_SIG 0x5f504d5f /* _MP_ */ #ifdef __amd64__ #define MAX_LAPIC_ID 63 /* Max local APIC ID for HTT fixup */ #else #define MAX_LAPIC_ID 31 /* Max local APIC ID for HTT fixup */ #endif #define BIOS_BASE (0xf0000) #define BIOS_SIZE (0x10000) #define BIOS_COUNT (BIOS_SIZE/4) typedef void mptable_entry_handler(u_char *entry, void *arg); typedef void mptable_extended_entry_handler(ext_entry_ptr entry, void *arg); /* descriptions of MP table entries */ typedef struct BASETABLE_ENTRY { uint8_t type; uint8_t length; uint8_t name[16]; } basetable_entry; static basetable_entry basetable_entry_types[] = { {0, 20, "Processor"}, {1, 8, "Bus"}, {2, 8, "I/O APIC"}, {3, 8, "I/O INT"}, {4, 8, "Local INT"} }; typedef struct BUSDATA { u_char bus_id; enum busTypes bus_type; } bus_datum; typedef struct INTDATA { u_char int_type; u_short int_flags; u_char src_bus_id; u_char src_bus_irq; u_char dst_apic_id; u_char dst_apic_int; u_char int_vector; } io_int, local_int; typedef struct BUSTYPENAME { u_char type; char name[7]; } bus_type_name; /* From MP spec v1.4, table 4-8. */ static bus_type_name bus_type_table[] = { {UNKNOWN_BUSTYPE, "CBUS "}, {UNKNOWN_BUSTYPE, "CBUSII"}, {EISA, "EISA "}, {UNKNOWN_BUSTYPE, "FUTURE"}, {UNKNOWN_BUSTYPE, "INTERN"}, {ISA, "ISA "}, {UNKNOWN_BUSTYPE, "MBI "}, {UNKNOWN_BUSTYPE, "MBII "}, {MCA, "MCA "}, {UNKNOWN_BUSTYPE, "MPI "}, {UNKNOWN_BUSTYPE, "MPSA "}, {UNKNOWN_BUSTYPE, "NUBUS "}, {PCI, "PCI "}, {UNKNOWN_BUSTYPE, "PCMCIA"}, {UNKNOWN_BUSTYPE, "TC "}, {UNKNOWN_BUSTYPE, "VL "}, {UNKNOWN_BUSTYPE, "VME "}, {UNKNOWN_BUSTYPE, "XPRESS"} }; /* From MP spec v1.4, table 5-1. */ static int default_data[7][5] = { /* nbus, id0, type0, id1, type1 */ {1, 0, ISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, MCA, 255, NOBUS}, {2, 0, ISA, 1, PCI}, {2, 0, EISA, 1, PCI}, {2, 0, MCA, 1, PCI} }; struct pci_probe_table_args { u_char bus; u_char found; }; struct pci_route_interrupt_args { u_char bus; /* Source bus. */ u_char irq; /* Source slot:pin. */ int vector; /* Return value. */ }; static mpfps_t mpfps; static mpcth_t mpct; static ext_entry_ptr mpet; static void *ioapics[IOAPIC_MAX_ID + 1]; static bus_datum *busses; static int mptable_nioapics, mptable_nbusses, mptable_maxbusid; static int pci0 = -1; static MALLOC_DEFINE(M_MPTABLE, "mptable", "MP Table Items"); static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq); static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq); static enum intr_polarity intentry_polarity(int_entry_ptr intr); static enum intr_trigger intentry_trigger(int_entry_ptr intr); static int lookup_bus_type(char *name); static void mptable_count_items(void); static void mptable_count_items_handler(u_char *entry, void *arg); #ifdef MPTABLE_FORCE_HTT static void mptable_hyperthread_fixup(u_int id_mask); #endif static void mptable_parse_apics_and_busses(void); static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg); static void mptable_parse_default_config_ints(void); static void mptable_parse_ints(void); static void mptable_parse_ints_handler(u_char *entry, void *arg); static void mptable_parse_io_int(int_entry_ptr intr); static void mptable_parse_local_int(int_entry_ptr intr); static void mptable_pci_probe_table_handler(u_char *entry, void *arg); static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg); static void mptable_pci_setup(void); static int mptable_probe(void); static int mptable_probe_cpus(void); static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused); static void mptable_setup_cpus_handler(u_char *entry, void *arg __unused); static void mptable_register(void *dummy); static int mptable_setup_local(void); static int mptable_setup_io(void); #ifdef NEW_PCIB static void mptable_walk_extended_table( mptable_extended_entry_handler *handler, void *arg); #endif static void mptable_walk_table(mptable_entry_handler *handler, void *arg); static int search_for_sig(u_int32_t target, int count); static struct apic_enumerator mptable_enumerator = { .apic_name = "MPTable", .apic_probe = mptable_probe, .apic_probe_cpus = mptable_probe_cpus, .apic_setup_local = mptable_setup_local, .apic_setup_io = mptable_setup_io }; /* * look for the MP spec signature */ static int search_for_sig(u_int32_t target, int count) { int x; u_int32_t *addr; addr = (u_int32_t *)BIOS_PADDRTOVADDR(target); for (x = 0; x < count; x += 4) if (addr[x] == MP_SIG) /* make array index a byte index */ return (target + (x * sizeof(u_int32_t))); return (-1); } static int lookup_bus_type(char *name) { int x; for (x = 0; x < MAX_BUSTYPE; ++x) if (strncmp(bus_type_table[x].name, name, 6) == 0) return (bus_type_table[x].type); return (UNKNOWN_BUSTYPE); } /* * Look for an Intel MP spec table (ie, SMP capable hardware). */ static int mptable_probe(void) { int x; u_long segment; u_int32_t target; /* see if EBDA exists */ if ((segment = *(u_short *)BIOS_PADDRTOVADDR(0x40e)) != 0) { /* search first 1K of EBDA */ target = (u_int32_t) (segment << 4); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } else { /* last 1K of base memory, effective 'top of base' passed in */ target = (u_int32_t) ((basemem * 1024) - 0x400); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } /* search the BIOS */ target = (u_int32_t) BIOS_BASE; if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) goto found; /* nothing found */ return (ENXIO); found: mpfps = (mpfps_t)BIOS_PADDRTOVADDR(x); /* Map in the configuration table if it exists. */ if (mpfps->config_type != 0) { if (bootverbose) printf( "MP Table version 1.%d found using Default Configuration %d\n", mpfps->spec_rev, mpfps->config_type); if (mpfps->config_type != 5 && mpfps->config_type != 6) { printf( "MP Table Default Configuration %d is unsupported\n", mpfps->config_type); return (ENXIO); } mpct = NULL; } else { if ((uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map MP Configuration Table\n", __func__); return (ENXIO); } mpct = (mpcth_t)BIOS_PADDRTOVADDR((uintptr_t)mpfps->pap); if (mpct->base_table_length + (uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map end of MP Config Table\n", __func__); return (ENXIO); } if (mpct->extended_table_length != 0 && mpct->extended_table_length + mpct->base_table_length + (uintptr_t)mpfps->pap < 1024 * 1024) mpet = (ext_entry_ptr)((char *)mpct + mpct->base_table_length); if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' || mpct->signature[2] != 'M' || mpct->signature[3] != 'P') { printf("%s: MP Config Table has bad signature: %c%c%c%c\n", __func__, mpct->signature[0], mpct->signature[1], mpct->signature[2], mpct->signature[3]); return (ENXIO); } if (bootverbose) printf( "MP Configuration Table version 1.%d found at %p\n", mpct->spec_rev, mpct); } return (-100); } /* * Run through the MP table enumerating CPUs. */ static int mptable_probe_cpus(void) { u_int cpu_mask; /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { #ifdef SMP mp_ncpus = 2; mp_maxid = 1; #endif max_apic_id = 1; } else { mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask); } return (0); } /* * Initialize the local APIC on the BSP. */ static int mptable_setup_local(void) { vm_paddr_t addr; u_int cpu_mask; /* Is this a pre-defined config? */ printf("MPTable: <"); if (mpfps->config_type != 0) { lapic_create(0, 1); lapic_create(1, 0); addr = DEFAULT_APIC_BASE; printf("Default Configuration %d", mpfps->config_type); } else { cpu_mask = 0; mptable_walk_table(mptable_setup_cpus_handler, &cpu_mask); #ifdef MPTABLE_FORCE_HTT mptable_hyperthread_fixup(cpu_mask); #endif addr = mpct->apic_address; printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id, (int)sizeof(mpct->product_id), mpct->product_id); } printf(">\n"); lapic_init(addr); return (0); } /* * Run through the MP table enumerating I/O APICs. */ static int mptable_setup_io(void) { int i; u_char byte; /* First, we count individual items and allocate arrays. */ mptable_count_items(); busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE, M_WAITOK); for (i = 0; i <= mptable_maxbusid; i++) busses[i].bus_type = NOBUS; /* Second, we run through adding I/O APIC's and buses. */ mptable_parse_apics_and_busses(); /* Third, we run through the table tweaking interrupt sources. */ mptable_parse_ints(); /* Fourth, we register all the I/O APIC's. */ for (i = 0; i <= IOAPIC_MAX_ID; i++) if (ioapics[i] != NULL) ioapic_register(ioapics[i]); /* Fifth, we setup data structures to handle PCI interrupt routing. */ mptable_pci_setup(); /* Finally, we throw the switch to enable the I/O APIC's. */ if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) { outb(0x22, 0x70); /* select IMCR */ byte = inb(0x23); /* current contents */ byte |= 0x01; /* mask external INTR */ outb(0x23, byte); /* disconnect 8259s/NMI */ } return (0); } static void mptable_register(void *dummy __unused) { apic_register_enumerator(&mptable_enumerator); } SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register, NULL); /* * Call the handler routine for each entry in the MP config base table. */ static void mptable_walk_table(mptable_entry_handler *handler, void *arg) { u_int i; u_char *entry; entry = (u_char *)(mpct + 1); for (i = 0; i < mpct->entry_count; i++) { switch (*entry) { case MPCT_ENTRY_PROCESSOR: case MPCT_ENTRY_IOAPIC: case MPCT_ENTRY_BUS: case MPCT_ENTRY_INT: case MPCT_ENTRY_LOCAL_INT: break; default: panic("%s: Unknown MP Config Entry %d\n", __func__, (int)*entry); } handler(entry, arg); entry += basetable_entry_types[*entry].length; } } #ifdef NEW_PCIB /* * Call the handler routine for each entry in the MP config extended * table. */ static void mptable_walk_extended_table(mptable_extended_entry_handler *handler, void *arg) { ext_entry_ptr end, entry; if (mpet == NULL) return; entry = mpet; end = (ext_entry_ptr)((char *)mpet + mpct->extended_table_length); while (entry < end) { handler(entry, arg); entry = (ext_entry_ptr)((char *)entry + entry->length); } } #endif static void mptable_probe_cpus_handler(u_char *entry, void *arg) { proc_entry_ptr proc; switch (*entry) { case MPCT_ENTRY_PROCESSOR: proc = (proc_entry_ptr)entry; if (proc->cpu_flags & PROCENTRY_FLAG_EN && proc->apic_id < MAX_LAPIC_ID && mp_ncpus < MAXCPU) { #ifdef SMP mp_ncpus++; mp_maxid = mp_ncpus - 1; #endif max_apic_id = max(max_apic_id, proc->apic_id); } break; } } static void mptable_setup_cpus_handler(u_char *entry, void *arg) { proc_entry_ptr proc; u_int *cpu_mask; switch (*entry) { case MPCT_ENTRY_PROCESSOR: proc = (proc_entry_ptr)entry; if (proc->cpu_flags & PROCENTRY_FLAG_EN) { lapic_create(proc->apic_id, proc->cpu_flags & PROCENTRY_FLAG_BP); if (proc->apic_id < MAX_LAPIC_ID) { cpu_mask = (u_int *)arg; *cpu_mask |= (1ul << proc->apic_id); } } break; } } static void mptable_count_items_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; mptable_nbusses++; if (bus->bus_id > mptable_maxbusid) mptable_maxbusid = bus->bus_id; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (apic->apic_flags & IOAPICENTRY_FLAG_EN) mptable_nioapics++; break; } } /* * Count items in the table. */ static void mptable_count_items(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { mptable_nioapics = 1; switch (mpfps->config_type) { case 1: case 2: case 3: case 4: mptable_nbusses = 1; break; case 5: case 6: case 7: mptable_nbusses = 2; break; default: panic("Unknown pre-defined MP Table config type %d", mpfps->config_type); } mptable_maxbusid = mptable_nbusses - 1; } else mptable_walk_table(mptable_count_items_handler, NULL); } /* * Add a bus or I/O APIC from an entry in the table. */ static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; enum busTypes bus_type; int i; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; bus_type = lookup_bus_type(bus->bus_type); if (bus_type == UNKNOWN_BUSTYPE) { printf("MPTable: Unknown bus %d type \"", bus->bus_id); for (i = 0; i < 6; i++) printf("%c", bus->bus_type[i]); printf("\"\n"); } busses[bus->bus_id].bus_id = bus->bus_id; busses[bus->bus_id].bus_type = bus_type; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN)) break; if (apic->apic_id > IOAPIC_MAX_ID) panic("%s: I/O APIC ID %d too high", __func__, apic->apic_id); if (ioapics[apic->apic_id] != NULL) panic("%s: Double APIC ID %d", __func__, apic->apic_id); ioapics[apic->apic_id] = ioapic_create(apic->apic_address, apic->apic_id, -1); break; default: break; } } /* * Enumerate I/O APIC's and buses. */ static void mptable_parse_apics_and_busses(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { ioapics[2] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0); busses[0].bus_id = 0; busses[0].bus_type = default_data[mpfps->config_type - 1][2]; if (mptable_nbusses > 1) { busses[1].bus_id = 1; busses[1].bus_type = default_data[mpfps->config_type - 1][4]; } } else mptable_walk_table(mptable_parse_apics_and_busses_handler, NULL); } /* * Determine conforming polarity for a given bus type. */ static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: case EISA: return (INTR_POLARITY_HIGH); case PCI: return (INTR_POLARITY_LOW); default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } /* * Determine conforming trigger for a given bus type. */ static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: if (elcr_found) return (elcr_read_trigger(src_bus_irq)); else return (INTR_TRIGGER_EDGE); case PCI: return (INTR_TRIGGER_LEVEL); case EISA: KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq)); KASSERT(elcr_found, ("Missing ELCR")); return (elcr_read_trigger(src_bus_irq)); default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } static enum intr_polarity intentry_polarity(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) { case INTENTRY_FLAGS_POLARITY_CONFORM: return (conforming_polarity(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_POLARITY_ACTIVEHI: return (INTR_POLARITY_HIGH); case INTENTRY_FLAGS_POLARITY_ACTIVELO: return (INTR_POLARITY_LOW); default: panic("Bogus interrupt flags"); } } static enum intr_trigger intentry_trigger(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) { case INTENTRY_FLAGS_TRIGGER_CONFORM: return (conforming_trigger(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case INTENTRY_FLAGS_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); default: panic("Bogus interrupt flags"); } } /* * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC. */ static void mptable_parse_io_int(int_entry_ptr intr) { void *ioapic; u_int pin, apic_id; apic_id = intr->dst_apic_id; if (intr->dst_apic_id == 0xff) { /* * An APIC ID of 0xff means that the interrupt is connected * to the specified pin on all I/O APICs in the system. If * there is only one I/O APIC, then use that APIC to route * the interrupts. If there is more than one I/O APIC, then * punt. */ if (mptable_nioapics == 1) { apic_id = 0; while (ioapics[apic_id] == NULL) apic_id++; } else { printf( "MPTable: Ignoring global interrupt entry for pin %d\n", intr->dst_apic_int); return; } } if (apic_id > IOAPIC_MAX_ID) { printf("MPTable: Ignoring interrupt entry for ioapic%d\n", intr->dst_apic_id); return; } ioapic = ioapics[apic_id]; if (ioapic == NULL) { printf( "MPTable: Ignoring interrupt entry for missing ioapic%d\n", apic_id); return; } pin = intr->dst_apic_int; switch (intr->int_type) { case INTENTRY_TYPE_INT: switch (busses[intr->src_bus_id].bus_type) { case NOBUS: panic("interrupt from missing bus"); case ISA: case EISA: if (busses[intr->src_bus_id].bus_type == ISA) ioapic_set_bus(ioapic, pin, APIC_BUS_ISA); else ioapic_set_bus(ioapic, pin, APIC_BUS_EISA); if (intr->src_bus_irq == pin) break; ioapic_remap_vector(ioapic, pin, intr->src_bus_irq); if (ioapic_get_vector(ioapic, intr->src_bus_irq) == intr->src_bus_irq) ioapic_disable_pin(ioapic, intr->src_bus_irq); break; case PCI: ioapic_set_bus(ioapic, pin, APIC_BUS_PCI); break; default: ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN); break; } break; case INTENTRY_TYPE_NMI: ioapic_set_nmi(ioapic, pin); break; case INTENTRY_TYPE_SMI: ioapic_set_smi(ioapic, pin); break; case INTENTRY_TYPE_EXTINT: ioapic_set_extint(ioapic, pin); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr)); if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) ioapic_set_polarity(ioapic, pin, intentry_polarity(intr)); } /* * Parse an interrupt entry for a local APIC LVT pin. */ static void mptable_parse_local_int(int_entry_ptr intr) { u_int apic_id, pin; if (intr->dst_apic_id == 0xff) apic_id = APIC_ID_ALL; else apic_id = intr->dst_apic_id; if (intr->dst_apic_int == 0) pin = APIC_LVT_LINT0; else pin = APIC_LVT_LINT1; switch (intr->int_type) { case INTENTRY_TYPE_INT: #if 1 printf( "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n", intr->dst_apic_int, intr->src_bus_irq); return; #else lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED); break; #endif case INTENTRY_TYPE_NMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); break; case INTENTRY_TYPE_SMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI); break; case INTENTRY_TYPE_EXTINT: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) lapic_set_lvt_triggermode(apic_id, pin, intentry_trigger(intr)); if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr)); } /* * Parse interrupt entries. */ static void mptable_parse_ints_handler(u_char *entry, void *arg __unused) { int_entry_ptr intr; intr = (int_entry_ptr)entry; switch (*entry) { case MPCT_ENTRY_INT: mptable_parse_io_int(intr); break; case MPCT_ENTRY_LOCAL_INT: mptable_parse_local_int(intr); break; } } /* * Configure interrupt pins for a default configuration. For details see * Table 5-2 in Section 5 of the MP Table specification. */ static void mptable_parse_default_config_ints(void) { struct INTENTRY entry; int pin; /* * All default configs route IRQs from bus 0 to the first 16 pins * of the first I/O APIC with an APIC ID of 2. */ entry.type = MPCT_ENTRY_INT; entry.int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; entry.src_bus_id = 0; entry.dst_apic_id = 2; /* Run through all 16 pins. */ for (pin = 0; pin < 16; pin++) { entry.dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ entry.int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = 0; break; default: /* All other pins are identity mapped. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = pin; break; } mptable_parse_io_int(&entry); } /* Certain configs disable certain pins. */ if (mpfps->config_type == 7) ioapic_disable_pin(ioapics[2], 0); if (mpfps->config_type == 2) { ioapic_disable_pin(ioapics[2], 2); ioapic_disable_pin(ioapics[2], 13); } } /* * Configure the interrupt pins */ static void mptable_parse_ints(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { /* Configure LINT pins. */ lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT0, APIC_LVT_DM_EXTINT); lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT1, APIC_LVT_DM_NMI); /* Configure I/O APIC pins. */ mptable_parse_default_config_ints(); } else mptable_walk_table(mptable_parse_ints_handler, NULL); } #ifdef MPTABLE_FORCE_HTT /* * Perform a hyperthreading "fix-up" to enumerate any logical CPU's * that aren't already listed in the table. * * XXX: We assume that all of the physical CPUs in the * system have the same number of logical CPUs. * * XXX: We assume that APIC ID's are allocated such that * the APIC ID's for a physical processor are aligned * with the number of logical CPU's in the processor. */ static void mptable_hyperthread_fixup(u_int id_mask) { u_int i, id, logical_cpus; /* Nothing to do if there is no HTT support. */ if ((cpu_feature & CPUID_HTT) == 0) return; logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; if (logical_cpus <= 1) return; /* * For each APIC ID of a CPU that is set in the mask, * scan the other candidate APIC ID's for this * physical processor. If any of those ID's are * already in the table, then kill the fixup. */ for (id = 0; id <= MAX_LAPIC_ID; id++) { if ((id_mask & 1 << id) == 0) continue; /* First, make sure we are on a logical_cpus boundary. */ if (id % logical_cpus != 0) return; for (i = id + 1; i < id + logical_cpus; i++) if ((id_mask & 1 << i) != 0) return; } /* * Ok, the ID's checked out, so perform the fixup by * adding the logical CPUs. */ while ((id = ffs(id_mask)) != 0) { id--; for (i = id + 1; i < id + logical_cpus; i++) { if (bootverbose) printf( "MPTable: Adding logical CPU %d from main CPU %d\n", i, id); lapic_create(i, 0); } id_mask &= ~(1 << id); } } #endif /* MPTABLE_FORCE_HTT */ /* * Support code for routing PCI interrupts using the MP Table. */ static void mptable_pci_setup(void) { int i; /* * Find the first pci bus and call it 0. Panic if pci0 is not * bus zero and there are multiple PCI buses. */ for (i = 0; i <= mptable_maxbusid; i++) if (busses[i].bus_type == PCI) { if (pci0 == -1) pci0 = i; else if (pci0 != 0) panic( "MPTable contains multiple PCI buses but no PCI bus 0"); } } static void mptable_pci_probe_table_handler(u_char *entry, void *arg) { struct pci_probe_table_args *args; int_entry_ptr intr; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_probe_table_args *)arg; KASSERT(args->bus <= mptable_maxbusid, ("bus %d is too big", args->bus)); KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus")); if (intr->src_bus_id == args->bus) args->found = 1; } int mptable_pci_probe_table(int bus) { struct pci_probe_table_args args; if (bus < 0) return (EINVAL); if (mpct == NULL || pci0 == -1 || pci0 + bus > mptable_maxbusid) return (ENXIO); if (busses[pci0 + bus].bus_type != PCI) return (ENXIO); args.bus = pci0 + bus; args.found = 0; mptable_walk_table(mptable_pci_probe_table_handler, &args); if (args.found == 0) return (ENXIO); return (0); } static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg) { struct pci_route_interrupt_args *args; int_entry_ptr intr; int vector; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_route_interrupt_args *)arg; if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq) return; /* Make sure the APIC maps to a known APIC. */ KASSERT(ioapics[intr->dst_apic_id] != NULL, ("No I/O APIC %d to route interrupt to", intr->dst_apic_id)); /* * Look up the vector for this APIC / pin combination. If we * have previously matched an entry for this PCI IRQ but it * has the same vector as this entry, just return. Otherwise, * we use the vector for this APIC / pin combination. */ vector = ioapic_get_vector(ioapics[intr->dst_apic_id], intr->dst_apic_int); if (args->vector == vector) return; KASSERT(args->vector == -1, ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n", args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector, vector)); args->vector = vector; } int mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin) { struct pci_route_interrupt_args args; int slot; /* Like ACPI, pin numbers are 0-3, not 1-4. */ pin--; KASSERT(pci0 != -1, ("do not know how to route PCI interrupts")); args.bus = pci_get_bus(dev) + pci0; slot = pci_get_slot(dev); /* * PCI interrupt entries in the MP Table encode both the slot and * pin into the IRQ with the pin being the two least significant * bits, the slot being the next five bits, and the most significant * bit being reserved. */ args.irq = slot << 2 | pin; args.vector = -1; mptable_walk_table(mptable_pci_route_interrupt_handler, &args); if (args.vector < 0) { device_printf(pcib, "unable to route slot %d INT%c\n", slot, 'A' + pin); return (PCI_INVALID_IRQ); } if (bootverbose) device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot, 'A' + pin, args.vector); return (args.vector); } #ifdef NEW_PCIB struct host_res_args { struct mptable_hostb_softc *sc; device_t dev; u_char bus; }; /* * Initialize a Host-PCI bridge so it can restrict resource allocation * requests to the resources it actually decodes according to MP * config table extended entries. */ static void mptable_host_res_handler(ext_entry_ptr entry, void *arg) { struct host_res_args *args; cbasm_entry_ptr cbasm; sas_entry_ptr sas; const char *name; uint64_t start, end; int error, *flagp, flags, type; args = arg; switch (entry->type) { case MPCT_EXTENTRY_SAS: sas = (sas_entry_ptr)entry; if (sas->bus_id != args->bus) break; switch (sas->address_type) { case SASENTRY_TYPE_IO: type = SYS_RES_IOPORT; flags = 0; break; case SASENTRY_TYPE_MEMORY: type = SYS_RES_MEMORY; flags = 0; break; case SASENTRY_TYPE_PREFETCH: type = SYS_RES_MEMORY; flags = RF_PREFETCHABLE; break; default: printf( "MPTable: Unknown systems address space type for bus %u: %d\n", sas->bus_id, sas->address_type); return; } start = sas->address_base; end = sas->address_base + sas->address_length - 1; #ifdef __i386__ if (start > ULONG_MAX) { device_printf(args->dev, "Ignoring %d range above 4GB (%#jx-%#jx)\n", type, (uintmax_t)start, (uintmax_t)end); break; } if (end > ULONG_MAX) { device_printf(args->dev, "Truncating end of %d range above 4GB (%#jx-%#jx)\n", type, (uintmax_t)start, (uintmax_t)end); end = ULONG_MAX; } #endif error = pcib_host_res_decodes(&args->sc->sc_host_res, type, start, end, flags); if (error) panic("Failed to manage %d range (%#jx-%#jx): %d", type, (uintmax_t)start, (uintmax_t)end, error); break; case MPCT_EXTENTRY_CBASM: cbasm = (cbasm_entry_ptr)entry; if (cbasm->bus_id != args->bus) break; switch (cbasm->predefined_range) { case CBASMENTRY_RANGE_ISA_IO: flagp = &args->sc->sc_decodes_isa_io; name = "ISA I/O"; break; case CBASMENTRY_RANGE_VGA_IO: flagp = &args->sc->sc_decodes_vga_io; name = "VGA I/O"; break; default: printf( "MPTable: Unknown compatiblity address space range for bus %u: %d\n", cbasm->bus_id, cbasm->predefined_range); return; } if (*flagp != 0) printf( "MPTable: Duplicate compatibility %s range for bus %u\n", name, cbasm->bus_id); switch (cbasm->address_mod) { case CBASMENTRY_ADDRESS_MOD_ADD: *flagp = 1; if (bootverbose) device_printf(args->dev, "decoding %s ports\n", name); break; case CBASMENTRY_ADDRESS_MOD_SUBTRACT: *flagp = -1; if (bootverbose) device_printf(args->dev, "not decoding %s ports\n", name); break; default: printf( "MPTable: Unknown compatibility address space modifier: %u\n", cbasm->address_mod); break; } break; } } void mptable_pci_host_res_init(device_t pcib) { struct host_res_args args; KASSERT(pci0 != -1, ("do not know how to map PCI bus IDs")); args.bus = pci_get_bus(pcib) + pci0; args.dev = pcib; args.sc = device_get_softc(pcib); if (pcib_host_res_init(pcib, &args.sc->sc_host_res) != 0) panic("failed to init hostb resources"); mptable_walk_extended_table(mptable_host_res_handler, &args); } #endif Index: head/sys/x86/x86/mptable_pci.c =================================================================== --- head/sys/x86/x86/mptable_pci.c (revision 344854) +++ head/sys/x86/x86/mptable_pci.c (revision 344855) @@ -1,242 +1,241 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Host to PCI and PCI to PCI bridge drivers that use the MP Table to route * interrupts from PCI devices to I/O APICs. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" /* Host to PCI bridge driver. */ static int mptable_hostb_probe(device_t dev) { if (pci_cfgregopen() == 0) return (ENXIO); if (mptable_pci_probe_table(pcib_get_bus(dev)) != 0) return (ENXIO); device_set_desc(dev, "MPTable Host-PCI bridge"); return (0); } static int mptable_hostb_attach(device_t dev) { #ifdef NEW_PCIB mptable_pci_host_res_init(dev); #endif device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } #ifdef NEW_PCIB static int mptable_is_isa_range(rman_res_t start, rman_res_t end) { if (end >= 0x10000) return (0); if ((start & 0xfc00) != (end & 0xfc00)) return (0); start &= ~0xfc00; end &= ~0xfc00; return (start >= 0x100 && end <= 0x3ff); } static int mptable_is_vga_range(rman_res_t start, rman_res_t end) { if (end >= 0x10000) return (0); if ((start & 0xfc00) != (end & 0xfc00)) return (0); start &= ~0xfc00; end &= ~0xfc00; return (pci_is_vga_ioport_range(start, end)); } static struct resource * mptable_hostb_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct mptable_hostb_softc *sc; #ifdef PCI_RES_BUS if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(0, child, rid, start, end, count, flags)); #endif sc = device_get_softc(dev); if (type == SYS_RES_IOPORT && start + count - 1 == end) { if (mptable_is_isa_range(start, end)) { switch (sc->sc_decodes_isa_io) { case -1: return (NULL); case 1: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); default: break; } } if (mptable_is_vga_range(start, end)) { switch (sc->sc_decodes_vga_io) { case -1: return (NULL); case 1: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); default: break; } } } start = hostb_alloc_start(type, start, end, count); return (pcib_host_res_alloc(&sc->sc_host_res, child, type, rid, start, end, count, flags)); } static int mptable_hostb_adjust_resource(device_t dev, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { struct mptable_hostb_softc *sc; #ifdef PCI_RES_BUS if (type == PCI_RES_BUS) return (pci_domain_adjust_bus(0, child, r, start, end)); #endif sc = device_get_softc(dev); return (pcib_host_res_adjust(&sc->sc_host_res, child, type, r, start, end)); } #endif static device_method_t mptable_hostb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mptable_hostb_probe), DEVMETHOD(device_attach, mptable_hostb_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), #ifdef NEW_PCIB DEVMETHOD(bus_alloc_resource, mptable_hostb_alloc_resource), DEVMETHOD(bus_adjust_resource, mptable_hostb_adjust_resource), #else DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource), DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), #endif #if defined(NEW_PCIB) && defined(PCI_RES_BUS) DEVMETHOD(bus_release_resource, legacy_pcib_release_resource), #else DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), DEVMETHOD_END }; static devclass_t hostb_devclass; DEFINE_CLASS_0(pcib, mptable_hostb_driver, mptable_hostb_methods, sizeof(struct mptable_hostb_softc)); DRIVER_MODULE(mptable_pcib, legacy, mptable_hostb_driver, hostb_devclass, 0, 0); /* PCI to PCI bridge driver. */ static int mptable_pcib_probe(device_t dev) { int bus; if ((pci_get_class(dev) != PCIC_BRIDGE) || (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) return (ENXIO); bus = pci_read_config(dev, PCIR_SECBUS_1, 1); if (bus == 0) return (ENXIO); if (mptable_pci_probe_table(bus) != 0) return (ENXIO); device_set_desc(dev, "MPTable PCI-PCI bridge"); return (-1000); } static device_method_t mptable_pcib_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mptable_pcib_probe), /* pcib interface */ DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), {0, 0} }; static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, mptable_pcib_driver, mptable_pcib_pci_methods, sizeof(struct pcib_softc), pcib_driver); DRIVER_MODULE(mptable_pcib, pci, mptable_pcib_driver, pcib_devclass, 0, 0); Index: head/sys/x86/xen/pvcpu_enum.c =================================================================== --- head/sys/x86/xen/pvcpu_enum.c (revision 344854) +++ head/sys/x86/xen/pvcpu_enum.c (revision 344855) @@ -1,264 +1,264 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 2003 John Baldwin * Copyright (c) 2013 Roger Pau Monné * All rights reserved. + * Copyright (c) 2003 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int xenpv_probe(void); static int xenpv_probe_cpus(void); static int xenpv_setup_local(void); static int xenpv_setup_io(void); static ACPI_TABLE_MADT *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; static struct apic_enumerator xenpv_enumerator = { .apic_name = "Xen PV", .apic_probe = xenpv_probe, .apic_probe_cpus = xenpv_probe_cpus, .apic_setup_local = xenpv_setup_local, .apic_setup_io = xenpv_setup_io }; /*--------------------- Helper functions to parse MADT -----------------------*/ /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { enum intr_trigger trig; enum intr_polarity pol; int ret; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } madt_parse_interrupt_values(intr, &trig, &pol); /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->SourceIrq != intr->GlobalIrq && intr->GlobalIrq > 15 && intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup. */ acpi_OverrideInterruptLevel(intr->GlobalIrq); /* Register the IRQ with the polarity and trigger mode found. */ ret = xen_register_pirq(intr->GlobalIrq, trig, pol); if (ret != 0) panic("Unable to register interrupt override"); } /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, handler, arg); } /* * Parse interrupt entries. */ static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { if (entry->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) madt_parse_interrupt_override( (ACPI_MADT_INTERRUPT_OVERRIDE *)entry); } /*---------------------------- Xen PV enumerator -----------------------------*/ /* * This enumerator will only be registered on PVH */ static int xenpv_probe(void) { return (0); } /* * Test each possible vCPU in order to find the number of vCPUs */ static int xenpv_probe_cpus(void) { #ifdef SMP int i, ret; for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) { ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); mp_ncpus = min(mp_ncpus + 1, MAXCPU); } mp_maxid = mp_ncpus - 1; max_apic_id = mp_ncpus * 2; #endif return (0); } /* * Initialize the vCPU id of the BSP */ static int xenpv_setup_local(void) { #ifdef SMP int i, ret; for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) { ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (ret >= 0) lapic_create((i * 2), (i == 0)); } #endif PCPU_SET(vcpu_id, 0); lapic_init(0); return (0); } /* * On PVH guests there's no IO APIC */ static int xenpv_setup_io(void) { if (xen_initial_domain()) { /* * NB: we could iterate over the MADT IOAPIC entries in order * to figure out the exact number of IOAPIC interrupts, but * this is legacy code so just keep using the previous * behaviour and assume a maximum of 256 interrupts. */ num_io_irqs = max(255, num_io_irqs); acpi_SetDefaultIntrModel(ACPI_INTR_APIC); } return (0); } void xenpv_register_pirqs(struct pic *pic __unused) { unsigned int i; int ret; /* Map MADT */ madt_physaddr = acpi_find_table(ACPI_SIG_MADT); madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT); madt_length = madt->Header.Length; /* Try to initialize ACPI so that we can access the FADT. */ ret = acpi_Startup(); if (ACPI_FAILURE(ret)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(ret)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } /* Run through the table to see if there are any overrides. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ret = xen_register_pirq(AcpiGbl_FADT.SciInterrupt, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); if (ret != 0) panic("Unable to register SCI IRQ"); } /* Register legacy ISA IRQs */ for (i = 1; i < 16; i++) { if (intr_lookup_source(i) != NULL) continue; ret = xen_register_pirq(i, INTR_TRIGGER_EDGE, INTR_POLARITY_LOW); if (ret != 0 && bootverbose) printf("Unable to register legacy IRQ#%u: %d\n", i, ret); } } static void xenpv_register(void *dummy __unused) { if (xen_pv_domain()) { apic_register_enumerator(&xenpv_enumerator); } } SYSINIT(xenpv_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, xenpv_register, NULL); Index: head/tests/sys/capsicum/ioctls_test.c =================================================================== --- head/tests/sys/capsicum/ioctls_test.c (revision 344854) +++ head/tests/sys/capsicum/ioctls_test.c (revision 344855) @@ -1,127 +1,126 @@ /*- * Copyright (c) 2018 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include /* * A variant of ATF_REQUIRE that is suitable for use in child * processes. This only works if the parent process is tripped up by * the early exit and fails some requirement itself. */ #define CHILD_REQUIRE(exp) do { \ if (!(exp)) \ child_fail_require(__FILE__, __LINE__, \ #exp " not met"); \ } while (0) static __dead2 void child_fail_require(const char *file, int line, const char *str) { char buf[128]; snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str); write(2, buf, strlen(buf)); _exit(32); } /* * Exercise the edge case of a custom ioctl list being copied from a * listen socket to an accepted socket. */ ATF_TC_WITHOUT_HEAD(cap_ioctls__listen_copy); ATF_TC_BODY(cap_ioctls__listen_copy, tc) { struct sockaddr_in sin; cap_rights_t rights; u_long cmds[] = { FIONREAD }; socklen_t len; pid_t pid; char dummy; int s[2], status; s[0] = socket(AF_INET, SOCK_STREAM, 0); ATF_REQUIRE(s[0] > 0); /* Bind to an arbitrary unused port. */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_port = 0; sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); ATF_REQUIRE(bind(s[0], (struct sockaddr *)&sin, sizeof(sin)) == 0); CHILD_REQUIRE(listen(s[0], 1) == 0); len = sizeof(sin); ATF_REQUIRE(getsockname(s[0], (struct sockaddr *)&sin, &len) == 0); ATF_REQUIRE(len == sizeof(sin)); cap_rights_init(&rights, CAP_ACCEPT, CAP_IOCTL); ATF_REQUIRE(cap_rights_limit(s[0], &rights) == 0); ATF_REQUIRE(cap_ioctls_limit(s[0], cmds, nitems(cmds)) == 0); pid = fork(); if (pid == 0) { s[1] = accept(s[0], NULL, NULL); CHILD_REQUIRE(s[1] > 0); /* Close both sockets during exit(). */ exit(0); } ATF_REQUIRE(pid > 0); ATF_REQUIRE(close(s[0]) == 0); s[1] = socket(AF_INET, SOCK_STREAM, 0); ATF_REQUIRE(s[1] > 0); ATF_REQUIRE(connect(s[1], (struct sockaddr *)&sin, sizeof(sin)) == 0); ATF_REQUIRE(read(s[1], &dummy, sizeof(dummy)) == 0); ATF_REQUIRE(close(s[1]) == 0); ATF_REQUIRE(wait(&status) == pid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, cap_ioctls__listen_copy); return (atf_no_error()); } Index: head/tests/sys/kern/ptrace_test.c =================================================================== --- head/tests/sys/kern/ptrace_test.c (revision 344854) +++ head/tests/sys/kern/ptrace_test.c (revision 344855) @@ -1,3909 +1,3908 @@ /*- * Copyright (c) 2015 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Architectures with a user-visible breakpoint(). */ #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) || defined(__riscv) || \ defined(__sparc64__) #define HAVE_BREAKPOINT #endif /* * Adjust PC to skip over a breakpoint when stopped for a breakpoint trap. */ #ifdef HAVE_BREAKPOINT #if defined(__aarch64__) #define SKIP_BREAK(reg) ((reg)->elr += 4) #elif defined(__amd64__) || defined(__i386__) #define SKIP_BREAK(reg) #elif defined(__arm__) #define SKIP_BREAK(reg) ((reg)->r_pc += 4) #elif defined(__mips__) #define SKIP_BREAK(reg) ((reg)->r_regs[PC] += 4) #elif defined(__riscv) #define SKIP_BREAK(reg) ((reg)->sepc += 4) #elif defined(__sparc64__) #define SKIP_BREAK(reg) do { \ (reg)->r_tpc = (reg)->r_tnpc + 4; \ (reg)->r_tnpc += 8; \ } while (0) #endif #endif /* * A variant of ATF_REQUIRE that is suitable for use in child * processes. This only works if the parent process is tripped up by * the early exit and fails some requirement itself. */ #define CHILD_REQUIRE(exp) do { \ if (!(exp)) \ child_fail_require(__FILE__, __LINE__, \ #exp " not met"); \ } while (0) static __dead2 void child_fail_require(const char *file, int line, const char *str) { char buf[128]; snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str); write(2, buf, strlen(buf)); _exit(32); } static void trace_me(void) { /* Attach the parent process as a tracer of this process. */ CHILD_REQUIRE(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); /* Trigger a stop. */ raise(SIGSTOP); } static void attach_child(pid_t pid) { pid_t wpid; int status; ATF_REQUIRE(ptrace(PT_ATTACH, pid, NULL, 0) == 0); wpid = waitpid(pid, &status, 0); ATF_REQUIRE(wpid == pid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); } static void wait_for_zombie(pid_t pid) { /* * Wait for a process to exit. This is kind of gross, but * there is not a better way. * * Prior to r325719, the kern.proc.pid. sysctl failed * with ESRCH. After that change, a valid struct kinfo_proc * is returned for zombies with ki_stat set to SZOMB. */ for (;;) { struct kinfo_proc kp; size_t len; int mib[4]; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PID; mib[3] = pid; len = sizeof(kp); if (sysctl(mib, nitems(mib), &kp, &len, NULL, 0) == -1) { ATF_REQUIRE(errno == ESRCH); break; } if (kp.ki_stat == SZOMB) break; usleep(5000); } } /* * Verify that a parent debugger process "sees" the exit of a debugged * process exactly once when attached via PT_TRACE_ME. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_trace_me); ATF_TC_BODY(ptrace__parent_wait_after_trace_me, tc) { pid_t child, wpid; int status; ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ trace_me(); _exit(1); } /* Parent process. */ /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* The second wait() should report the exit status. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); /* The child should no longer exist. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a parent debugger process "sees" the exit of a debugged * process exactly once when attached via PT_ATTACH. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_attach); ATF_TC_BODY(ptrace__parent_wait_after_attach, tc) { pid_t child, wpid; int cpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for the parent to attach. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == 0); _exit(1); } close(cpipe[1]); /* Parent process. */ /* Attach to the child process. */ attach_child(child); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal the child to exit. */ close(cpipe[0]); /* The second wait() should report the exit status. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); /* The child should no longer exist. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a parent process "sees" the exit of a debugged process only * after the debugger has seen it. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_child_debugger); ATF_TC_BODY(ptrace__parent_sees_exit_after_child_debugger, tc) { pid_t child, debugger, wpid; int cpipe[2], dpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for parent's failed wait. */ CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == 0); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0); close(cpipe[0]); wait_for_zombie(child); /* * This wait should return a pid of 0 to indicate no status to * report. The parent should see the child as non-exited * until the debugger sees the exit. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Signal the debugger to wait for the child. */ close(dpipe[0]); /* Wait for the debugger. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * Verify that a parent process "sees" the exit of a debugged process * only after a non-direct-child debugger has seen it. In particular, * various wait() calls in the parent must avoid failing with ESRCH by * checking the parent's orphan list for the debugee. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_unrelated_debugger); ATF_TC_BODY(ptrace__parent_sees_exit_after_unrelated_debugger, tc) { pid_t child, debugger, fpid, wpid; int cpipe[2], dpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger parent. */ /* * Fork again and drop the debugger parent so that the * debugger is not a child of the main parent. */ CHILD_REQUIRE((fpid = fork()) != -1); if (fpid != 0) _exit(2); /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for parent's failed wait. */ CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == sizeof(c)); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger parent process to exit. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); /* A WNOHANG wait here should see the non-exited child. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0); close(cpipe[0]); wait_for_zombie(child); /* * This wait should return a pid of 0 to indicate no status to * report. The parent should see the child as non-exited * until the debugger sees the exit. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Signal the debugger to wait for the child. */ ATF_REQUIRE(write(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Wait for the debugger. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == 0); close(dpipe[0]); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * The parent process should always act the same regardless of how the * debugger is attached to it. */ static __dead2 void follow_fork_parent(bool use_vfork) { pid_t fpid, wpid; int status; if (use_vfork) CHILD_REQUIRE((fpid = vfork()) != -1); else CHILD_REQUIRE((fpid = fork()) != -1); if (fpid == 0) /* Child */ _exit(2); wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 2); _exit(1); } /* * Helper routine for follow fork tests. This waits for two stops * that report both "sides" of a fork. It returns the pid of the new * child process. */ static pid_t handle_fork_events(pid_t parent, struct ptrace_lwpinfo *ppl) { struct ptrace_lwpinfo pl; bool fork_reported[2]; pid_t child, wpid; int i, status; fork_reported[0] = false; fork_reported[1] = false; child = -1; /* * Each process should report a fork event. The parent should * report a PL_FLAG_FORKED event, and the child should report * a PL_FLAG_CHILD event. */ for (i = 0; i < 2; i++) { wpid = wait(&status); ATF_REQUIRE(wpid > 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) != 0); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) != (PL_FLAG_FORKED | PL_FLAG_CHILD)); if (pl.pl_flags & PL_FLAG_CHILD) { ATF_REQUIRE(wpid != parent); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(!fork_reported[1]); if (child == -1) child = wpid; else ATF_REQUIRE(child == wpid); if (ppl != NULL) ppl[1] = pl; fork_reported[1] = true; } else { ATF_REQUIRE(wpid == parent); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(!fork_reported[0]); if (child == -1) child = pl.pl_child_pid; else ATF_REQUIRE(child == pl.pl_child_pid); if (ppl != NULL) ppl[0] = pl; fork_reported[0] = true; } } return (child); } /* * Verify that a new child process is stopped after a followed fork and * that the traced parent sees the exit of the child after the debugger * when both processes remain attached to the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached); ATF_TC_BODY(ptrace__follow_fork_both_attached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the new * child process is detached after it reports its fork. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached); ATF_TC_BODY(ptrace__follow_fork_child_detached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the grandchild now, only the * child. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the * traced parent is detached after the fork. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached); ATF_TC_BODY(ptrace__follow_fork_parent_detached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. * * Even though the child process is detached, it is still a * child of the debugger, so it will still report it's exit * after the grandchild. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void attach_fork_parent(int cpipe[2]) { pid_t fpid; close(cpipe[0]); /* Double-fork to disassociate from the debugger. */ CHILD_REQUIRE((fpid = fork()) != -1); if (fpid != 0) _exit(3); /* Send the pid of the disassociated child to the debugger. */ fpid = getpid(); CHILD_REQUIRE(write(cpipe[1], &fpid, sizeof(fpid)) == sizeof(fpid)); /* Wait for the debugger to attach. */ CHILD_REQUIRE(read(cpipe[1], &fpid, sizeof(fpid)) == 0); } /* * Verify that a new child process is stopped after a followed fork and * that the traced parent sees the exit of the child after the debugger * when both processes remain attached to the debugger. In this test * the parent that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_both_attached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The fork parent can't exit until the child reports status, * so the child should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the new * child process is detached after it reports its fork. In this test * the parent that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_child_detached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the child now, only the fork * parent. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the * traced parent is detached after the fork. In this test the parent * that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_parent_detached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the fork parent now, only * the child. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a child process does not see an unrelated debugger as its * parent but sees its original parent process. */ ATF_TC_WITHOUT_HEAD(ptrace__getppid); ATF_TC_BODY(ptrace__getppid, tc) { pid_t child, debugger, ppid, wpid; int cpipe[2], dpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); /* Report the parent PID to the parent. */ ppid = getppid(); CHILD_REQUIRE(write(cpipe[1], &ppid, sizeof(ppid)) == sizeof(ppid)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for traced child to exit. */ wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); /* Read the parent PID from the child. */ ATF_REQUIRE(read(cpipe[0], &ppid, sizeof(ppid)) == sizeof(ppid)); close(cpipe[0]); ATF_REQUIRE(ppid == getpid()); /* Wait for the debugger. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * child process created via fork() reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_fork); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_fork, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl[0].pl_syscall_code == SYS_fork); ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code); ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * child process created via vfork() reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_vfork); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_vfork, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl[0].pl_syscall_code == SYS_vfork); ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code); ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * simple_thread(void *arg __unused) { pthread_exit(NULL); } static __dead2 void simple_thread_main(void) { pthread_t thread; CHILD_REQUIRE(pthread_create(&thread, NULL, simple_thread, NULL) == 0); CHILD_REQUIRE(pthread_join(thread, NULL) == 0); exit(1); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * thread reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_thread); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_thread, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t mainlwp; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); mainlwp = pl.pl_lwpid; /* * Continue the child ignoring the SIGSTOP and tracing all * system call exits. */ ATF_REQUIRE(ptrace(PT_TO_SCX, fpid, (caddr_t)1, 0) != -1); /* * Wait for the new thread to arrive. pthread_create() might * invoke any number of system calls. For now we just wait * for the new thread to arrive and make sure it reports a * valid system call code. If ptrace grows thread event * reporting then this test can be made more precise. */ for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl.pl_syscall_code != 0); if (pl.pl_lwpid != mainlwp) /* New thread seen. */ break; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Wait for the child to exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFEXITED(status)) break; ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected LWP events are reported for a child thread. */ ATF_TC_WITHOUT_HEAD(ptrace__lwp_events); ATF_TC_BODY(ptrace__lwp_events, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); lwps[0] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child thread's death. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) == (PL_FLAG_EXITED | PL_FLAG_SCE)); ATF_REQUIRE(pl.pl_lwpid == lwps[1]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * exec_thread(void *arg __unused) { execl("/usr/bin/true", "true", NULL); exit(127); } static __dead2 void exec_thread_main(void) { pthread_t thread; CHILD_REQUIRE(pthread_create(&thread, NULL, exec_thread, NULL) == 0); for (;;) sleep(60); exit(1); } /* * Verify that the expected LWP events are reported for a multithreaded * process that calls execve(2). */ ATF_TC_WITHOUT_HEAD(ptrace__lwp_events_exec); ATF_TC_BODY(ptrace__lwp_events_exec, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); lwps[0] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next event should be for the main thread's death due to * single threading from execve(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) == (PL_FLAG_EXITED)); ATF_REQUIRE(pl.pl_lwpid == lwps[0]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child process's exec. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) == (PL_FLAG_EXEC | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid == lwps[1]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void handler(int sig __unused) { } static void signal_main(void) { signal(SIGINFO, handler); raise(SIGINFO); exit(0); } /* * Verify that the expected ptrace event is reported for a signal. */ ATF_TC_WITHOUT_HEAD(ptrace__siginfo); ATF_TC_BODY(ptrace__siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); signal_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the SIGINFO. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGINFO); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_event == PL_EVENT_SIGNAL); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_code == SI_LWP); ATF_REQUIRE(pl.pl_siginfo.si_pid == wpid); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected ptrace events are reported for PTRACE_EXEC. */ ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_disable); ATF_TC_BODY(ptrace__ptrace_exec_disable, tc) { pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread(NULL); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); events = 0; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Should get one event at exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_enable); ATF_TC_BODY(ptrace__ptrace_exec_enable, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread(NULL); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); events = PTRACE_EXEC; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child process's exec. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) == (PL_FLAG_EXEC | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__event_mask); ATF_TC_BODY(ptrace__event_mask, tc) { pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(0); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* PT_FOLLOW_FORK should toggle the state of PTRACE_FORK. */ ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 1) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(events & PTRACE_FORK); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(!(events & PTRACE_FORK)); /* PT_LWP_EVENTS should toggle the state of PTRACE_LWP. */ ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 1) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(events & PTRACE_LWP); ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(!(events & PTRACE_LWP)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Should get one event at exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected ptrace events are reported for PTRACE_VFORK. */ ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork); ATF_TC_BODY(ptrace__ptrace_vfork, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); events |= PTRACE_VFORK; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1); /* The next event should report the end of the vfork. */ wpid = wait(&status); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_VFORK_DONE) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1); wpid = wait(&status); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork_follow); ATF_TC_BODY(ptrace__ptrace_vfork_follow, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, children[0], (caddr_t)&events, sizeof(events)) == 0); events |= PTRACE_FORK | PTRACE_VFORK; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, children[0], (caddr_t)&events, sizeof(events)) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORKED) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = waitpid(children[1], &status, 0); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); /* * The child should report it's vfork() completion before it * exits. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl[0], sizeof(pl[0])) != -1); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORK_DONE) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #ifdef HAVE_BREAKPOINT /* * Verify that no more events are reported after PT_KILL except for the * process exit when stopped due to a breakpoint trap. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_breakpoint); ATF_TC_BODY(ptrace__PT_KILL_breakpoint, tc) { pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); breakpoint(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report hitting the breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* HAVE_BREAKPOINT */ /* * Verify that no more events are reported after PT_KILL except for the * process exit when stopped inside of a system call. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_system_call); ATF_TC_BODY(ptrace__PT_KILL_system_call, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that no more events are reported after PT_KILL except for the * process exit when killing a multithreaded process. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_threads); ATF_TC_BODY(ptrace__PT_KILL_threads, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t main_lwp; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); main_lwp = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != main_lwp); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * mask_usr1_thread(void *arg) { pthread_barrier_t *pbarrier; sigset_t sigmask; pbarrier = (pthread_barrier_t*)arg; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR1); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(pbarrier); for (;;) sleep(60); return (NULL); } /* * Verify that the SIGKILL from PT_KILL takes priority over other signals * and prevents spurious stops due to those other signals. */ ATF_TC(ptrace__PT_KILL_competing_signal); ATF_TC_HEAD(ptrace__PT_KILL_competing_signal, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__PT_KILL_competing_signal, tc) { pid_t fpid, wpid; int status; cpuset_t setmask; pthread_t t; pthread_barrier_t barrier; struct sched_param sched_param; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { /* Bind to one CPU so only one thread at a time will run. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); cpusetid_t setid; CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread, (void*)&barrier) == 0); /* * Give the main thread higher priority. The test always * assumes that, if both threads are able to run, the main * thread runs first. */ sched_param.sched_priority = (sched_get_priority_max(SCHED_FIFO) + sched_get_priority_min(SCHED_FIFO)) / 2; CHILD_REQUIRE(pthread_setschedparam(pthread_self(), SCHED_FIFO, &sched_param) == 0); sched_param.sched_priority -= RQ_PPQ; CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO, &sched_param) == 0); sigset_t sigmask; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR2); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(&barrier); trace_me(); for (;;) sleep(60); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Send a signal that only the second thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The second wait() should report the SIGUSR2. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Send a signal that only the first thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Replace the SIGUSR2 with a kill. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL (not the SIGUSR signal). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the SIGKILL from PT_KILL takes priority over other stop events * and prevents spurious stops caused by those events. */ ATF_TC(ptrace__PT_KILL_competing_stop); ATF_TC_HEAD(ptrace__PT_KILL_competing_stop, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__PT_KILL_competing_stop, tc) { pid_t fpid, wpid; int status; cpuset_t setmask; pthread_t t; pthread_barrier_t barrier; lwpid_t main_lwp; struct ptrace_lwpinfo pl; struct sched_param sched_param; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); /* Bind to one CPU so only one thread at a time will run. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); cpusetid_t setid; CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread, (void*)&barrier) == 0); /* * Give the main thread higher priority. The test always * assumes that, if both threads are able to run, the main * thread runs first. */ sched_param.sched_priority = (sched_get_priority_max(SCHED_FIFO) + sched_get_priority_min(SCHED_FIFO)) / 2; CHILD_REQUIRE(pthread_setschedparam(pthread_self(), SCHED_FIFO, &sched_param) == 0); sched_param.sched_priority -= RQ_PPQ; CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO, &sched_param) == 0); sigset_t sigmask; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR2); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(&barrier); /* Sync up with the test before doing the getpid(). */ raise(SIGSTOP); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); main_lwp = pl.pl_lwpid; /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* * Continue until child is done with setup, which is indicated with * SIGSTOP. Ignore system calls in the meantime. */ for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); if (WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); } else { ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); break; } ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); } /* Proceed, allowing main thread to hit syscall entry for getpid(). */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_lwpid == main_lwp); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Prevent the main thread from hitting its syscall exit for now. */ ATF_REQUIRE(ptrace(PT_SUSPEND, main_lwp, 0, 0) == 0); /* * Proceed, allowing second thread to hit syscall exit for * pthread_barrier_wait(). */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_lwpid != main_lwp); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Send a signal that only the second thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the SIGUSR2. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Allow the main thread to try to finish its system call. */ ATF_REQUIRE(ptrace(PT_RESUME, main_lwp, 0, 0) == 0); /* * At this point, the main thread is in the middle of a system call and * has been resumed. The second thread has taken a SIGUSR2 which will * be replaced with a SIGKILL below. The main thread will get to run * first. It should notice the kill request (even though the signal * replacement occurred in the other thread) and exit accordingly. It * should not stop for the system call exit event. */ /* Replace the SIGUSR2 with a kill. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL (not a syscall exit). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void sigusr1_handler(int sig) { CHILD_REQUIRE(sig == SIGUSR1); _exit(2); } /* * Verify that even if the signal queue is full for a child process, * a PT_KILL will kill the process. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_with_signal_full_sigqueue); ATF_TC_BODY(ptrace__PT_KILL_with_signal_full_sigqueue, tc) { pid_t fpid, wpid; int status; int max_pending_per_proc; size_t len; int i; ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that when stopped at a system call entry, a signal can be * requested with PT_CONTINUE which will be delivered once the system * call is complete. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void sigusr1_counting_handler(int sig) { static int counter = 0; CHILD_REQUIRE(sig == SIGUSR1); counter++; if (counter == 2) _exit(2); } /* * Verify that, when continuing from a stop at system call entry and exit, * a signal can be requested from both stops, and both will be delivered when * the system call is complete. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The third wait() should report a system call exit for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that even if the signal queue is full for a child process, * a PT_CONTINUE with a signal will not result in loss of that signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_full_sigqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_full_sigqueue, tc) { pid_t fpid, wpid; int status; int max_pending_per_proc; size_t len; int i; ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR); ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* Continue with signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status)) { ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { /* * The last wait() should report normal _exit from the * SIGUSR1 handler. */ ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static sem_t sigusr1_sem; static int got_usr1; static void sigusr1_sempost_handler(int sig __unused) { got_usr1++; CHILD_REQUIRE(sem_post(&sigusr1_sem) == 0); } /* * Verify that even if the signal queue is full for a child process, * and the signal is masked, a PT_CONTINUE with a signal will not * result in loss of that signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; int max_pending_per_proc; size_t len; int i; sigset_t sigmask; ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR); ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive the SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* Continue with signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* Collect and ignore all of the SIGUSR2. */ for (i = 0; i < max_pending_per_proc; ++i) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Now our PT_CONTINUE'd SIGUSR1 should cause a stop after unmask. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, ignoring the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last wait() should report exit after receiving SIGUSR1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that, after stopping due to a signal, that signal can be * replaced with another signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_change_sig); ATF_TC_BODY(ptrace__PT_CONTINUE_change_sig, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); sleep(20); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Send a signal without ptrace. */ ATF_REQUIRE(kill(fpid, SIGINT) == 0); /* The second wait() should report a SIGINT was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGINT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGINT); /* Continue the child process with a different signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTERM) == 0); /* * The last wait() should report having died due to the new * signal, SIGTERM. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGTERM); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a signal can be passed through to the child even when there * was no true signal originally. Such cases arise when a SIGTRAP is * invented for e.g, system call stops. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry); ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry, tc) { struct ptrace_lwpinfo pl; struct rlimit rl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); /* SIGTRAP expected to cause exit on syscall entry. */ rl.rlim_cur = rl.rlim_max = 0; ATF_REQUIRE(setrlimit(RLIMIT_CORE, &rl) == 0); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a SIGTRAP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTRAP) == 0); for (;;) { /* * The last wait() should report exit due to SIGTRAP. In the * meantime, catch and proceed past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGTRAP); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * A mixed bag PT_CONTINUE with signal test. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_mix); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_mix, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue with the first SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report a system call exit for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Send an ABRT without ptrace. */ ATF_REQUIRE(kill(fpid, SIGABRT) == 0); /* Continue normally. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the SIGABRT. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Continue, replacing the SIGABRT with another SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify a signal delivered by ptrace is noticed by kevent(2). */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_kqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_kqueue, tc) { pid_t fpid, wpid; int status, kq, nevents; struct kevent kev; ATF_REQUIRE(signal(SIGUSR1, SIG_IGN) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE((kq = kqueue()) > 0); EV_SET(&kev, SIGUSR1, EVFILT_SIGNAL, EV_ADD, 0, 0, 0); CHILD_REQUIRE(kevent(kq, &kev, 1, NULL, 0, NULL) == 0); trace_me(); for (;;) { nevents = kevent(kq, NULL, 0, &kev, 1, NULL); if (nevents == -1 && errno == EINTR) continue; CHILD_REQUIRE(nevents > 0); CHILD_REQUIRE(kev.filter == EVFILT_SIGNAL); CHILD_REQUIRE(kev.ident == SIGUSR1); break; } exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue with the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* * The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * signal_thread(void *arg) { int err; sigset_t sigmask; pthread_barrier_t *pbarrier = (pthread_barrier_t*)arg; /* Wait for this thread to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); /* Free our companion thread from the barrier. */ pthread_barrier_wait(pbarrier); /* * Swap ignore duties; the next SIGUSR1 should go to the * other thread. */ CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up threads after swapping signal masks. */ pthread_barrier_wait(pbarrier); /* Wait until our companion has received its SIGUSR1. */ pthread_barrier_wait(pbarrier); return (NULL); } /* * Verify that a traced process with blocked signal received the * signal from kill() once unmasked. */ ATF_TC_WITHOUT_HEAD(ptrace__killed_with_sigmask); ATF_TC_BODY(ptrace__killed_with_sigmask, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; sigset_t sigmask; ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP); /* Send blocked SIGUSR1 which should cause a stop. */ ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the kill(SIGUSR1) was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, allowing in the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a traced process with blocked signal received the * signal from PT_CONTINUE once unmasked. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigmask); ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigmask, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; sigset_t sigmask; ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP); /* Continue the child replacing SIGSTOP with SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report the SIGUSR1 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, ignoring the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that if ptrace stops due to a signal but continues with * a different signal that the new signal is routed to a thread * that can accept it, and that the thread is awakened by the signal * in a timely manner. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_thread_sigmask); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_thread_sigmask, tc) { pid_t fpid, wpid; int status, err; pthread_t t; sigset_t sigmask; pthread_barrier_t barrier; ATF_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(pthread_create(&t, NULL, signal_thread, (void*)&barrier) == 0); /* The other thread should receive the first SIGUSR1. */ CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); /* Wait until other thread has received its SIGUSR1. */ pthread_barrier_wait(&barrier); /* * Swap ignore duties; the next SIGUSR1 should go to this * thread. */ CHILD_REQUIRE(pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Sync up threads after swapping signal masks. */ pthread_barrier_wait(&barrier); /* * Sync up with test code; we're ready for the next SIGUSR1 * now. */ raise(SIGSTOP); /* Wait for this thread to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); /* Free the other thread from the barrier. */ pthread_barrier_wait(&barrier); CHILD_REQUIRE(pthread_join(t, NULL) == 0); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * Send a signal without ptrace that either thread will accept (USR2, * in this case). */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The second wait() should report a SIGUSR2 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Continue the child, changing the signal to USR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The next wait() should report a SIGUSR2 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Continue the child, changing the signal to USR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * raise_sigstop_thread(void *arg __unused) { raise(SIGSTOP); return NULL; } static void * sleep_thread(void *arg __unused) { sleep(60); return NULL; } static void terminate_with_pending_sigstop(bool sigstop_from_main_thread) { pid_t fpid, wpid; int status, i; cpuset_t setmask; cpusetid_t setid; pthread_t t; /* * Become the reaper for this process tree. We need to be able to check * that both child and grandchild have died. */ ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0); fpid = fork(); ATF_REQUIRE(fpid >= 0); if (fpid == 0) { fpid = fork(); CHILD_REQUIRE(fpid >= 0); if (fpid == 0) { trace_me(); /* Pin to CPU 0 to serialize thread execution. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); if (sigstop_from_main_thread) { /* * We expect the SIGKILL sent when our parent * dies to be delivered to the new thread. * Raise the SIGSTOP in this thread so the * threads compete. */ CHILD_REQUIRE(pthread_create(&t, NULL, sleep_thread, NULL) == 0); raise(SIGSTOP); } else { /* * We expect the SIGKILL to be delivered to * this thread. After creating the new thread, * just get off the CPU so the other thread can * raise the SIGSTOP. */ CHILD_REQUIRE(pthread_create(&t, NULL, raise_sigstop_thread, NULL) == 0); sleep(60); } exit(0); } /* First stop is trace_me() immediately after fork. */ wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Second stop is from the raise(SIGSTOP). */ wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* * Terminate tracing process without detaching. Our child * should be killed. */ exit(0); } /* * We should get a normal exit from our immediate child and a SIGKILL * exit from our grandchild. The latter case is the interesting one. * Our grandchild should not have stopped due to the SIGSTOP that was * left dangling when its parent died. */ for (i = 0; i < 2; ++i) { wpid = wait(&status); if (wpid == fpid) ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0); else ATF_REQUIRE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL); } } /* * These two tests ensure that if the tracing process exits without detaching * just after the child received a SIGSTOP, the child is cleanly killed and * doesn't go to sleep due to the SIGSTOP. The parent's death will send a * SIGKILL to the child. If the SIGKILL and the SIGSTOP are handled by * different threads, the SIGKILL must win. There are two variants of this * test, designed to catch the case where the SIGKILL is delivered to the * younger thread (the first test) and the case where the SIGKILL is delivered * to the older thread (the second test). This behavior has changed in the * past, so make no assumption. */ ATF_TC(ptrace__parent_terminate_with_pending_sigstop1); ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop1, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop1, tc) { terminate_with_pending_sigstop(true); } ATF_TC(ptrace__parent_terminate_with_pending_sigstop2); ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop2, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop2, tc) { terminate_with_pending_sigstop(false); } /* * Verify that after ptrace() discards a SIGKILL signal, the event mask * is not modified. */ ATF_TC_WITHOUT_HEAD(ptrace__event_mask_sigkill_discard); ATF_TC_BODY(ptrace__event_mask_sigkill_discard, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, event_mask, new_event_mask; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); raise(SIGSTOP); exit(0); } /* The first wait() should report the stop from trace_me(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Set several unobtrusive event bits. */ event_mask = PTRACE_EXEC | PTRACE_FORK | PTRACE_LWP; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, wpid, (caddr_t)&event_mask, sizeof(event_mask)) == 0); /* Send a SIGKILL without using ptrace. */ ATF_REQUIRE(kill(fpid, SIGKILL) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should be due to the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGKILL); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGKILL); /* Continue the child ignoring the SIGKILL. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Check the current event mask. It should not have changed. */ new_event_mask = 0; ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, wpid, (caddr_t)&new_event_mask, sizeof(new_event_mask)) == 0); ATF_REQUIRE(event_mask == new_event_mask); /* Continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * flock_thread(void *arg) { int fd; fd = *(int *)arg; (void)flock(fd, LOCK_EX); (void)flock(fd, LOCK_UN); return (NULL); } /* * Verify that PT_ATTACH will suspend threads sleeping in an SBDRY section. * We rely on the fact that the lockf implementation sets SBDRY before blocking * on a lock. This is a regression test for r318191. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_ATTACH_with_SBDRY_thread); ATF_TC_BODY(ptrace__PT_ATTACH_with_SBDRY_thread, tc) { pthread_barrier_t barrier; pthread_barrierattr_t battr; char tmpfile[64]; pid_t child, wpid; int error, fd, i, status; ATF_REQUIRE(pthread_barrierattr_init(&battr) == 0); ATF_REQUIRE(pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_SHARED) == 0); ATF_REQUIRE(pthread_barrier_init(&barrier, &battr, 2) == 0); (void)snprintf(tmpfile, sizeof(tmpfile), "./ptrace.XXXXXX"); fd = mkstemp(tmpfile); ATF_REQUIRE(fd >= 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { pthread_t t[2]; int cfd; error = pthread_barrier_wait(&barrier); if (error != 0 && error != PTHREAD_BARRIER_SERIAL_THREAD) _exit(1); cfd = open(tmpfile, O_RDONLY); if (cfd < 0) _exit(1); /* * We want at least two threads blocked on the file lock since * the SIGSTOP from PT_ATTACH may kick one of them out of * sleep. */ if (pthread_create(&t[0], NULL, flock_thread, &cfd) != 0) _exit(1); if (pthread_create(&t[1], NULL, flock_thread, &cfd) != 0) _exit(1); if (pthread_join(t[0], NULL) != 0) _exit(1); if (pthread_join(t[1], NULL) != 0) _exit(1); _exit(0); } ATF_REQUIRE(flock(fd, LOCK_EX) == 0); error = pthread_barrier_wait(&barrier); ATF_REQUIRE(error == 0 || error == PTHREAD_BARRIER_SERIAL_THREAD); /* * Give the child some time to block. Is there a better way to do this? */ sleep(1); /* * Attach and give the child 3 seconds to stop. */ ATF_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) == 0); for (i = 0; i < 3; i++) { wpid = waitpid(child, &status, WNOHANG); if (wpid == child && WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) break; sleep(1); } ATF_REQUIRE_MSG(i < 3, "failed to stop child process after PT_ATTACH"); ATF_REQUIRE(ptrace(PT_DETACH, child, NULL, 0) == 0); ATF_REQUIRE(flock(fd, LOCK_UN) == 0); ATF_REQUIRE(unlink(tmpfile) == 0); ATF_REQUIRE(close(fd) == 0); } static void sigusr1_step_handler(int sig) { CHILD_REQUIRE(sig == SIGUSR1); raise(SIGABRT); } /* * Verify that PT_STEP with a signal invokes the signal before * stepping the next instruction (and that the next instruction is * stepped correctly). */ ATF_TC_WITHOUT_HEAD(ptrace__PT_STEP_with_signal); ATF_TC_BODY(ptrace__PT_STEP_with_signal, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); signal(SIGUSR1, sigusr1_step_handler); raise(SIGABRT); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report the SIGABRT in the child body. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Step the child process inserting SIGUSR1. */ ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next stop should report the SIGABRT in the signal handler. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Continue the child process discarding the signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report a trace trap from PT_STEP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE); /* Continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #ifdef HAVE_BREAKPOINT /* * Verify that a SIGTRAP event with the TRAP_BRKPT code is reported * for a breakpoint trap. */ ATF_TC_WITHOUT_HEAD(ptrace__breakpoint_siginfo); ATF_TC_BODY(ptrace__breakpoint_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); breakpoint(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report hitting the breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_BRKPT); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* HAVE_BREAKPOINT */ /* * Verify that a SIGTRAP event with the TRAP_TRACE code is reported * for a single-step trap from PT_STEP. */ ATF_TC_WITHOUT_HEAD(ptrace__step_siginfo); ATF_TC_BODY(ptrace__step_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Step the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a single-step trap. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE); /* Continue the child process. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK) static void * continue_thread(void *arg __unused) { breakpoint(); return (NULL); } static __dead2 void continue_thread_main(void) { pthread_t threads[2]; CHILD_REQUIRE(pthread_create(&threads[0], NULL, continue_thread, NULL) == 0); CHILD_REQUIRE(pthread_create(&threads[1], NULL, continue_thread, NULL) == 0); CHILD_REQUIRE(pthread_join(threads[0], NULL) == 0); CHILD_REQUIRE(pthread_join(threads[1], NULL) == 0); exit(1); } /* * Ensure that PT_CONTINUE clears the status of the thread that * triggered the stop even if a different thread's LWP was passed to * PT_CONTINUE. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_different_thread); ATF_TC_BODY(ptrace__PT_CONTINUE_different_thread, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; bool hit_break[2]; struct reg reg; int i, j, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); continue_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* One of the new threads should report it's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); lwps[0] = pl.pl_lwpid; /* * Suspend this thread to ensure both threads are alive before * hitting the breakpoint. */ ATF_REQUIRE(ptrace(PT_SUSPEND, lwps[0], NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Second thread should report it's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; /* Resume both threads waiting for breakpoint events. */ hit_break[0] = hit_break[1] = false; ATF_REQUIRE(ptrace(PT_RESUME, lwps[0], NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* One thread should report a breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP && pl.pl_siginfo.si_code == TRAP_BRKPT); if (pl.pl_lwpid == lwps[0]) i = 0; else i = 1; hit_break[i] = true; ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); SKIP_BREAK(®); ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); /* * Resume both threads but pass the other thread's LWPID to * PT_CONTINUE. */ ATF_REQUIRE(ptrace(PT_CONTINUE, lwps[i ^ 1], (caddr_t)1, 0) == 0); /* * Will now get two thread exit events and one more breakpoint * event. */ for (j = 0; j < 3; j++) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); if (pl.pl_lwpid == lwps[0]) i = 0; else i = 1; ATF_REQUIRE_MSG(lwps[i] != 0, "event for exited thread"); if (pl.pl_flags & PL_FLAG_EXITED) { ATF_REQUIRE_MSG(hit_break[i], "exited thread did not report breakpoint"); lwps[i] = 0; } else { ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP && pl.pl_siginfo.si_code == TRAP_BRKPT); ATF_REQUIRE_MSG(!hit_break[i], "double breakpoint event"); hit_break[i] = true; ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); SKIP_BREAK(®); ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); } ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Both threads should have exited. */ ATF_REQUIRE(lwps[0] == 0); ATF_REQUIRE(lwps[1] == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* * Verify that PT_LWPINFO doesn't return stale siginfo. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_LWPINFO_stale_siginfo); ATF_TC_BODY(ptrace__PT_LWPINFO_stale_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); raise(SIGABRT); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report the SIGABRT in the child body. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* * Continue the process ignoring the signal, but enabling * syscall traps. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* * The next stop should report a system call entry from * exit(). PL_FLAGS_SI should not be set. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) == 0); /* Disable syscall tracing and continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); events &= ~PTRACE_SYSCALL; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_trace_me); ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_attach); ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_child_debugger); ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__getppid); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_fork); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_vfork); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_thread); ATF_TP_ADD_TC(tp, ptrace__lwp_events); ATF_TP_ADD_TC(tp, ptrace__lwp_events_exec); ATF_TP_ADD_TC(tp, ptrace__siginfo); ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_disable); ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_enable); ATF_TP_ADD_TC(tp, ptrace__event_mask); ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork); ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork_follow); #ifdef HAVE_BREAKPOINT ATF_TP_ADD_TC(tp, ptrace__PT_KILL_breakpoint); #endif ATF_TP_ADD_TC(tp, ptrace__PT_KILL_system_call); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_threads); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_signal); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_stop); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_with_signal_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_change_sig); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigtrap_system_call_entry); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_mix); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_kqueue); ATF_TP_ADD_TC(tp, ptrace__killed_with_sigmask); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigmask); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_thread_sigmask); ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop1); ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop2); ATF_TP_ADD_TC(tp, ptrace__event_mask_sigkill_discard); ATF_TP_ADD_TC(tp, ptrace__PT_ATTACH_with_SBDRY_thread); ATF_TP_ADD_TC(tp, ptrace__PT_STEP_with_signal); #ifdef HAVE_BREAKPOINT ATF_TP_ADD_TC(tp, ptrace__breakpoint_siginfo); #endif ATF_TP_ADD_TC(tp, ptrace__step_siginfo); #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK) ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_different_thread); #endif ATF_TP_ADD_TC(tp, ptrace__PT_LWPINFO_stale_siginfo); return (atf_no_error()); } Index: head/tools/tools/decioctl/decioctl.c =================================================================== --- head/tools/tools/decioctl/decioctl.c (revision 344854) +++ head/tools/tools/decioctl/decioctl.c (revision 344855) @@ -1,94 +1,93 @@ /*- * Copyright (c) 2005-2006,2016 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include static void usage(char **av) { fprintf(stderr, "%s: [ ... ]\n", av[0]); exit(1); } int main(int ac, char **av) { unsigned long cmd; const char *name; char *cp; int group, i; if (ac < 2) usage(av); printf(" command : dir group num len name\n"); for (i = 1; i < ac; i++) { errno = 0; cmd = strtoul(av[i], &cp, 0); if (*cp != '\0' || errno != 0) { fprintf(stderr, "Invalid integer: %s\n", av[i]); usage(av); } printf("0x%08lx: ", cmd); switch (cmd & IOC_DIRMASK) { case IOC_VOID: printf("VOID "); break; case IOC_OUT: printf("OUT "); break; case IOC_IN: printf("IN "); break; case IOC_INOUT: printf("INOUT"); break; default: printf("%01lx ???", (cmd & IOC_DIRMASK) >> 29); break; } printf(" "); group = IOCGROUP(cmd); if (isprint(group)) printf(" '%c' ", group); else printf(" 0x%02x", group); printf(" %3lu %4lu", cmd & 0xff, IOCPARM_LEN(cmd)); name = sysdecode_ioctlname(cmd); if (name != NULL) printf(" %s", name); printf("\n"); } return (0); } Index: head/usr.sbin/bhyve/gdb.c =================================================================== --- head/usr.sbin/bhyve/gdb.c (revision 344854) +++ head/usr.sbin/bhyve/gdb.c (revision 344855) @@ -1,1313 +1,1312 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017-2018 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "mem.h" #include "mevent.h" /* * GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops * use SIGTRAP. */ #define GDB_SIGNAL_TRAP 5 static void gdb_resume_vcpus(void); static void check_command(int fd); static struct mevent *read_event, *write_event; static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting; static pthread_mutex_t gdb_lock; static pthread_cond_t idle_vcpus; static bool stop_pending, first_stop; static int stepping_vcpu, stopped_vcpu; /* * An I/O buffer contains 'capacity' bytes of room at 'data'. For a * read buffer, 'start' is unused and 'len' contains the number of * valid bytes in the buffer. For a write buffer, 'start' is set to * the index of the next byte in 'data' to send, and 'len' contains * the remaining number of valid bytes to send. */ struct io_buffer { uint8_t *data; size_t capacity; size_t start; size_t len; }; static struct io_buffer cur_comm, cur_resp; static uint8_t cur_csum; static int cur_vcpu; static struct vmctx *ctx; static int cur_fd = -1; const int gdb_regset[] = { VM_REG_GUEST_RAX, VM_REG_GUEST_RBX, VM_REG_GUEST_RCX, VM_REG_GUEST_RDX, VM_REG_GUEST_RSI, VM_REG_GUEST_RDI, VM_REG_GUEST_RBP, VM_REG_GUEST_RSP, VM_REG_GUEST_R8, VM_REG_GUEST_R9, VM_REG_GUEST_R10, VM_REG_GUEST_R11, VM_REG_GUEST_R12, VM_REG_GUEST_R13, VM_REG_GUEST_R14, VM_REG_GUEST_R15, VM_REG_GUEST_RIP, VM_REG_GUEST_RFLAGS, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_ES, VM_REG_GUEST_FS, VM_REG_GUEST_GS }; const int gdb_regsize[] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4 }; #ifdef GDB_LOG #include #include static void __printflike(1, 2) debug(const char *fmt, ...) { static FILE *logfile; va_list ap; if (logfile == NULL) { logfile = fopen("/tmp/bhyve_gdb.log", "w"); if (logfile == NULL) return; #ifndef WITHOUT_CAPSICUM if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) { fclose(logfile); logfile = NULL; return; } #endif setlinebuf(logfile); } va_start(ap, fmt); vfprintf(logfile, fmt, ap); va_end(ap); } #else #define debug(...) #endif static int guest_paging_info(int vcpu, struct vm_guest_paging *paging) { uint64_t regs[4]; const int regset[4] = { VM_REG_GUEST_CR0, VM_REG_GUEST_CR3, VM_REG_GUEST_CR4, VM_REG_GUEST_EFER }; if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1) return (-1); /* * For the debugger, always pretend to be the kernel (CPL 0), * and if long-mode is enabled, always parse addresses as if * in 64-bit mode. */ paging->cr3 = regs[1]; paging->cpl = 0; if (regs[3] & EFER_LMA) paging->cpu_mode = CPU_MODE_64BIT; else if (regs[0] & CR0_PE) paging->cpu_mode = CPU_MODE_PROTECTED; else paging->cpu_mode = CPU_MODE_REAL; if (!(regs[0] & CR0_PG)) paging->paging_mode = PAGING_MODE_FLAT; else if (!(regs[2] & CR4_PAE)) paging->paging_mode = PAGING_MODE_32; else if (regs[3] & EFER_LME) paging->paging_mode = PAGING_MODE_64; else paging->paging_mode = PAGING_MODE_PAE; return (0); } /* * Map a guest virtual address to a physical address (for a given vcpu). * If a guest virtual address is valid, return 1. If the address is * not valid, return 0. If an error occurs obtaining the mapping, * return -1. */ static int guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr) { struct vm_guest_paging paging; int fault; if (guest_paging_info(vcpu, &paging) == -1) return (-1); /* * Always use PROT_READ. We really care if the VA is * accessible, not if the current vCPU can write. */ if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr, &fault) == -1) return (-1); if (fault) return (0); return (1); } static void io_buffer_reset(struct io_buffer *io) { io->start = 0; io->len = 0; } /* Available room for adding data. */ static size_t io_buffer_avail(struct io_buffer *io) { return (io->capacity - (io->start + io->len)); } static uint8_t * io_buffer_head(struct io_buffer *io) { return (io->data + io->start); } static uint8_t * io_buffer_tail(struct io_buffer *io) { return (io->data + io->start + io->len); } static void io_buffer_advance(struct io_buffer *io, size_t amount) { assert(amount <= io->len); io->start += amount; io->len -= amount; } static void io_buffer_consume(struct io_buffer *io, size_t amount) { io_buffer_advance(io, amount); if (io->len == 0) { io->start = 0; return; } /* * XXX: Consider making this move optional and compacting on a * future read() before realloc(). */ memmove(io->data, io_buffer_head(io), io->len); io->start = 0; } static void io_buffer_grow(struct io_buffer *io, size_t newsize) { uint8_t *new_data; size_t avail, new_cap; avail = io_buffer_avail(io); if (newsize <= avail) return; new_cap = io->capacity + (newsize - avail); new_data = realloc(io->data, new_cap); if (new_data == NULL) err(1, "Failed to grow GDB I/O buffer"); io->data = new_data; io->capacity = new_cap; } static bool response_pending(void) { if (cur_resp.start == 0 && cur_resp.len == 0) return (false); if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+') return (false); return (true); } static void close_connection(void) { /* * XXX: This triggers a warning because mevent does the close * before the EV_DELETE. */ pthread_mutex_lock(&gdb_lock); mevent_delete(write_event); mevent_delete_close(read_event); write_event = NULL; read_event = NULL; io_buffer_reset(&cur_comm); io_buffer_reset(&cur_resp); cur_fd = -1; /* Resume any stopped vCPUs. */ gdb_resume_vcpus(); pthread_mutex_unlock(&gdb_lock); } static uint8_t hex_digit(uint8_t nibble) { if (nibble <= 9) return (nibble + '0'); else return (nibble + 'a' - 10); } static uint8_t parse_digit(uint8_t v) { if (v >= '0' && v <= '9') return (v - '0'); if (v >= 'a' && v <= 'f') return (v - 'a' + 10); if (v >= 'A' && v <= 'F') return (v - 'A' + 10); return (0xF); } /* Parses big-endian hexadecimal. */ static uintmax_t parse_integer(const uint8_t *p, size_t len) { uintmax_t v; v = 0; while (len > 0) { v <<= 4; v |= parse_digit(*p); p++; len--; } return (v); } static uint8_t parse_byte(const uint8_t *p) { return (parse_digit(p[0]) << 4 | parse_digit(p[1])); } static void send_pending_data(int fd) { ssize_t nwritten; if (cur_resp.len == 0) { mevent_disable(write_event); return; } nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len); if (nwritten == -1) { warn("Write to GDB socket failed"); close_connection(); } else { io_buffer_advance(&cur_resp, nwritten); if (cur_resp.len == 0) mevent_disable(write_event); else mevent_enable(write_event); } } /* Append a single character to the output buffer. */ static void send_char(uint8_t data) { io_buffer_grow(&cur_resp, 1); *io_buffer_tail(&cur_resp) = data; cur_resp.len++; } /* Append an array of bytes to the output buffer. */ static void send_data(const uint8_t *data, size_t len) { io_buffer_grow(&cur_resp, len); memcpy(io_buffer_tail(&cur_resp), data, len); cur_resp.len += len; } static void format_byte(uint8_t v, uint8_t *buf) { buf[0] = hex_digit(v >> 4); buf[1] = hex_digit(v & 0xf); } /* * Append a single byte (formatted as two hex characters) to the * output buffer. */ static void send_byte(uint8_t v) { uint8_t buf[2]; format_byte(v, buf); send_data(buf, sizeof(buf)); } static void start_packet(void) { send_char('$'); cur_csum = 0; } static void finish_packet(void) { send_char('#'); send_byte(cur_csum); debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp)); } /* * Append a single character (for the packet payload) and update the * checksum. */ static void append_char(uint8_t v) { send_char(v); cur_csum += v; } /* * Append an array of bytes (for the packet payload) and update the * checksum. */ static void append_packet_data(const uint8_t *data, size_t len) { send_data(data, len); while (len > 0) { cur_csum += *data; data++; len--; } } static void append_string(const char *str) { append_packet_data(str, strlen(str)); } static void append_byte(uint8_t v) { uint8_t buf[2]; format_byte(v, buf); append_packet_data(buf, sizeof(buf)); } static void append_unsigned_native(uintmax_t value, size_t len) { size_t i; for (i = 0; i < len; i++) { append_byte(value); value >>= 8; } } static void append_unsigned_be(uintmax_t value, size_t len) { char buf[len * 2]; size_t i; for (i = 0; i < len; i++) { format_byte(value, buf + (len - i - 1) * 2); value >>= 8; } append_packet_data(buf, sizeof(buf)); } static void append_integer(unsigned int value) { if (value == 0) append_char('0'); else append_unsigned_be(value, fls(value) + 7 / 8); } static void append_asciihex(const char *str) { while (*str != '\0') { append_byte(*str); str++; } } static void send_empty_response(void) { start_packet(); finish_packet(); } static void send_error(int error) { start_packet(); append_char('E'); append_byte(error); finish_packet(); } static void send_ok(void) { start_packet(); append_string("OK"); finish_packet(); } static int parse_threadid(const uint8_t *data, size_t len) { if (len == 1 && *data == '0') return (0); if (len == 2 && memcmp(data, "-1", 2) == 0) return (-1); if (len == 0) return (-2); return (parse_integer(data, len)); } static void report_stop(void) { start_packet(); if (stopped_vcpu == -1) append_char('S'); else append_char('T'); append_byte(GDB_SIGNAL_TRAP); if (stopped_vcpu != -1) { append_string("thread:"); append_integer(stopped_vcpu + 1); append_char(';'); } stopped_vcpu = -1; finish_packet(); } static void gdb_finish_suspend_vcpus(void) { if (first_stop) { first_stop = false; stopped_vcpu = -1; } else if (response_pending()) stop_pending = true; else { report_stop(); send_pending_data(cur_fd); } } static void _gdb_cpu_suspend(int vcpu, bool report_stop) { debug("$vCPU %d suspending\n", vcpu); CPU_SET(vcpu, &vcpus_waiting); if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) gdb_finish_suspend_vcpus(); while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu) pthread_cond_wait(&idle_vcpus, &gdb_lock); CPU_CLR(vcpu, &vcpus_waiting); debug("$vCPU %d resuming\n", vcpu); } void gdb_cpu_add(int vcpu) { debug("$vCPU %d starting\n", vcpu); pthread_mutex_lock(&gdb_lock); CPU_SET(vcpu, &vcpus_active); /* * If a vcpu is added while vcpus are stopped, suspend the new * vcpu so that it will pop back out with a debug exit before * executing the first instruction. */ if (!CPU_EMPTY(&vcpus_suspended)) { CPU_SET(vcpu, &vcpus_suspended); _gdb_cpu_suspend(vcpu, false); } pthread_mutex_unlock(&gdb_lock); } void gdb_cpu_suspend(int vcpu) { pthread_mutex_lock(&gdb_lock); _gdb_cpu_suspend(vcpu, true); pthread_mutex_unlock(&gdb_lock); } void gdb_cpu_mtrap(int vcpu) { debug("$vCPU %d MTRAP\n", vcpu); pthread_mutex_lock(&gdb_lock); if (vcpu == stepping_vcpu) { stepping_vcpu = -1; vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); vm_suspend_cpu(ctx, vcpu); assert(stopped_vcpu == -1); stopped_vcpu = vcpu; _gdb_cpu_suspend(vcpu, true); } pthread_mutex_unlock(&gdb_lock); } static void gdb_suspend_vcpus(void) { assert(pthread_mutex_isowned_np(&gdb_lock)); debug("suspending all CPUs\n"); vcpus_suspended = vcpus_active; vm_suspend_cpu(ctx, -1); if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) gdb_finish_suspend_vcpus(); } static bool gdb_step_vcpu(int vcpu) { int error, val; debug("$vCPU %d step\n", vcpu); error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); if (error < 0) return (false); error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); vm_resume_cpu(ctx, vcpu); stepping_vcpu = vcpu; pthread_cond_broadcast(&idle_vcpus); return (true); } static void gdb_resume_vcpus(void) { assert(pthread_mutex_isowned_np(&gdb_lock)); vm_resume_cpu(ctx, -1); debug("resuming all CPUs\n"); CPU_ZERO(&vcpus_suspended); pthread_cond_broadcast(&idle_vcpus); } static void gdb_read_regs(void) { uint64_t regvals[nitems(gdb_regset)]; int i; if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset), gdb_regset, regvals) == -1) { send_error(errno); return; } start_packet(); for (i = 0; i < nitems(regvals); i++) append_unsigned_native(regvals[i], gdb_regsize[i]); finish_packet(); } static void gdb_read_mem(const uint8_t *data, size_t len) { uint64_t gpa, gva, val; uint8_t *cp; size_t resid, todo, bytes; bool started; int error; cp = memchr(data, ',', len); if (cp == NULL) { send_error(EINVAL); return; } gva = parse_integer(data + 1, cp - (data + 1)); resid = parse_integer(cp + 1, len - (cp + 1 - data)); started = false; while (resid > 0) { error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); if (error == -1) { if (started) finish_packet(); else send_error(errno); return; } if (error == 0) { if (started) finish_packet(); else send_error(EFAULT); return; } /* Read bytes from current page. */ todo = getpagesize() - gpa % getpagesize(); if (todo > resid) todo = resid; cp = paddr_guest2host(ctx, gpa, todo); if (cp != NULL) { /* * If this page is guest RAM, read it a byte * at a time. */ if (!started) { start_packet(); started = true; } while (todo > 0) { append_byte(*cp); cp++; gpa++; gva++; resid--; todo--; } } else { /* * If this page isn't guest RAM, try to handle * it via MMIO. For MMIO requests, use * aligned reads of words when possible. */ while (todo > 0) { if (gpa & 1 || todo == 1) bytes = 1; else if (gpa & 2 || todo == 2) bytes = 2; else bytes = 4; error = read_mem(ctx, cur_vcpu, gpa, &val, bytes); if (error == 0) { if (!started) { start_packet(); started = true; } gpa += bytes; gva += bytes; resid -= bytes; todo -= bytes; while (bytes > 0) { append_byte(val); val >>= 8; bytes--; } } else { if (started) finish_packet(); else send_error(EFAULT); return; } } } assert(resid == 0 || gpa % getpagesize() == 0); } if (!started) start_packet(); finish_packet(); } static bool command_equals(const uint8_t *data, size_t len, const char *cmd) { if (strlen(cmd) > len) return (false); return (memcmp(data, cmd, strlen(cmd)) == 0); } static void gdb_query(const uint8_t *data, size_t len) { /* * TODO: * - qSearch * - qSupported */ if (command_equals(data, len, "qAttached")) { start_packet(); append_char('1'); finish_packet(); } else if (command_equals(data, len, "qC")) { start_packet(); append_string("QC"); append_integer(cur_vcpu + 1); finish_packet(); } else if (command_equals(data, len, "qfThreadInfo")) { cpuset_t mask; bool first; int vcpu; if (CPU_EMPTY(&vcpus_active)) { send_error(EINVAL); return; } mask = vcpus_active; start_packet(); append_char('m'); first = true; while (!CPU_EMPTY(&mask)) { vcpu = CPU_FFS(&mask) - 1; CPU_CLR(vcpu, &mask); if (first) first = false; else append_char(','); append_integer(vcpu + 1); } finish_packet(); } else if (command_equals(data, len, "qsThreadInfo")) { start_packet(); append_char('l'); finish_packet(); } else if (command_equals(data, len, "qThreadExtraInfo")) { char buf[16]; int tid; data += strlen("qThreadExtraInfo"); len -= strlen("qThreadExtraInfo"); if (*data != ',') { send_error(EINVAL); return; } tid = parse_threadid(data + 1, len - 1); if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { send_error(EINVAL); return; } snprintf(buf, sizeof(buf), "vCPU %d", tid - 1); start_packet(); append_asciihex(buf); finish_packet(); } else send_empty_response(); } static void handle_command(const uint8_t *data, size_t len) { /* Reject packets with a sequence-id. */ if (len >= 3 && data[0] >= '0' && data[0] <= '9' && data[0] >= '0' && data[0] <= '9' && data[2] == ':') { send_empty_response(); return; } switch (*data) { case 'c': if (len != 1) { send_error(EINVAL); break; } /* Don't send a reply until a stop occurs. */ gdb_resume_vcpus(); break; case 'D': send_ok(); /* TODO: Resume any stopped CPUs. */ break; case 'g': { gdb_read_regs(); break; } case 'H': { int tid; if (data[1] != 'g' && data[1] != 'c') { send_error(EINVAL); break; } tid = parse_threadid(data + 2, len - 2); if (tid == -2) { send_error(EINVAL); break; } if (CPU_EMPTY(&vcpus_active)) { send_error(EINVAL); break; } if (tid == -1 || tid == 0) cur_vcpu = CPU_FFS(&vcpus_active) - 1; else if (CPU_ISSET(tid - 1, &vcpus_active)) cur_vcpu = tid - 1; else { send_error(EINVAL); break; } send_ok(); break; } case 'm': gdb_read_mem(data, len); break; case 'T': { int tid; tid = parse_threadid(data + 1, len - 1); if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { send_error(EINVAL); return; } send_ok(); break; } case 'q': gdb_query(data, len); break; case 's': if (len != 1) { send_error(EINVAL); break; } /* Don't send a reply until a stop occurs. */ if (!gdb_step_vcpu(cur_vcpu)) { send_error(EOPNOTSUPP); break; } break; case '?': /* XXX: Only if stopped? */ /* For now, just report that we are always stopped. */ start_packet(); append_char('S'); append_byte(GDB_SIGNAL_TRAP); finish_packet(); break; case 'G': /* TODO */ case 'M': /* TODO */ case 'v': /* Handle 'vCont' */ /* 'vCtrlC' */ case 'p': /* TODO */ case 'P': /* TODO */ case 'Q': /* TODO */ case 't': /* TODO */ case 'X': /* TODO */ case 'z': /* TODO */ case 'Z': /* TODO */ default: send_empty_response(); } } /* Check for a valid packet in the command buffer. */ static void check_command(int fd) { uint8_t *head, *hash, *p, sum; size_t avail, plen; for (;;) { avail = cur_comm.len; if (avail == 0) return; head = io_buffer_head(&cur_comm); switch (*head) { case 0x03: debug("<- Ctrl-C\n"); io_buffer_consume(&cur_comm, 1); gdb_suspend_vcpus(); break; case '+': /* ACK of previous response. */ debug("<- +\n"); if (response_pending()) io_buffer_reset(&cur_resp); io_buffer_consume(&cur_comm, 1); if (stop_pending) { stop_pending = false; report_stop(); send_pending_data(fd); } break; case '-': /* NACK of previous response. */ debug("<- -\n"); if (response_pending()) { cur_resp.len += cur_resp.start; cur_resp.start = 0; if (cur_resp.data[0] == '+') io_buffer_advance(&cur_resp, 1); debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp)); } io_buffer_consume(&cur_comm, 1); send_pending_data(fd); break; case '$': /* Packet. */ if (response_pending()) { warnx("New GDB command while response in " "progress"); io_buffer_reset(&cur_resp); } /* Is packet complete? */ hash = memchr(head, '#', avail); if (hash == NULL) return; plen = (hash - head + 1) + 2; if (avail < plen) return; debug("<- %.*s\n", (int)plen, head); /* Verify checksum. */ for (sum = 0, p = head + 1; p < hash; p++) sum += *p; if (sum != parse_byte(hash + 1)) { io_buffer_consume(&cur_comm, plen); debug("-> -\n"); send_char('-'); send_pending_data(fd); break; } send_char('+'); handle_command(head + 1, hash - (head + 1)); io_buffer_consume(&cur_comm, plen); if (!response_pending()) debug("-> +\n"); send_pending_data(fd); break; default: /* XXX: Possibly drop connection instead. */ debug("-> %02x\n", *head); io_buffer_consume(&cur_comm, 1); break; } } } static void gdb_readable(int fd, enum ev_type event, void *arg) { ssize_t nread; int pending; if (ioctl(fd, FIONREAD, &pending) == -1) { warn("FIONREAD on GDB socket"); return; } /* * 'pending' might be zero due to EOF. We need to call read * with a non-zero length to detect EOF. */ if (pending == 0) pending = 1; /* Ensure there is room in the command buffer. */ io_buffer_grow(&cur_comm, pending); assert(io_buffer_avail(&cur_comm) >= pending); nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm)); if (nread == 0) { close_connection(); } else if (nread == -1) { if (errno == EAGAIN) return; warn("Read from GDB socket"); close_connection(); } else { cur_comm.len += nread; pthread_mutex_lock(&gdb_lock); check_command(fd); pthread_mutex_unlock(&gdb_lock); } } static void gdb_writable(int fd, enum ev_type event, void *arg) { send_pending_data(fd); } static void new_connection(int fd, enum ev_type event, void *arg) { int optval, s; s = accept4(fd, NULL, NULL, SOCK_NONBLOCK); if (s == -1) { if (arg != NULL) err(1, "Failed accepting initial GDB connection"); /* Silently ignore errors post-startup. */ return; } optval = 1; if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) == -1) { warn("Failed to disable SIGPIPE for GDB connection"); close(s); return; } pthread_mutex_lock(&gdb_lock); if (cur_fd != -1) { close(s); warnx("Ignoring additional GDB connection."); } read_event = mevent_add(s, EVF_READ, gdb_readable, NULL); if (read_event == NULL) { if (arg != NULL) err(1, "Failed to setup initial GDB connection"); pthread_mutex_unlock(&gdb_lock); return; } write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL); if (write_event == NULL) { if (arg != NULL) err(1, "Failed to setup initial GDB connection"); mevent_delete_close(read_event); read_event = NULL; } cur_fd = s; cur_vcpu = 0; stepping_vcpu = -1; stopped_vcpu = -1; stop_pending = false; /* Break on attach. */ first_stop = true; gdb_suspend_vcpus(); pthread_mutex_unlock(&gdb_lock); } #ifndef WITHOUT_CAPSICUM void limit_gdb_socket(int s) { cap_rights_t rights; unsigned long ioctls[] = { FIONREAD }; cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE, CAP_SETSOCKOPT, CAP_IOCTL); if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); } #endif void init_gdb(struct vmctx *_ctx, int sport, bool wait) { struct sockaddr_in sin; int error, flags, s; debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not "); error = pthread_mutex_init(&gdb_lock, NULL); if (error != 0) errc(1, error, "gdb mutex init"); error = pthread_cond_init(&idle_vcpus, NULL); if (error != 0) errc(1, error, "gdb cv init"); ctx = _ctx; s = socket(PF_INET, SOCK_STREAM, 0); if (s < 0) err(1, "gdb socket create"); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); sin.sin_port = htons(sport); if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) err(1, "gdb socket bind"); if (listen(s, 1) < 0) err(1, "gdb socket listen"); if (wait) { /* * Set vcpu 0 in vcpus_suspended. This will trigger the * logic in gdb_cpu_add() to suspend the first vcpu before * it starts execution. The vcpu will remain suspended * until a debugger connects. */ stepping_vcpu = -1; stopped_vcpu = -1; CPU_SET(0, &vcpus_suspended); } flags = fcntl(s, F_GETFL); if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) err(1, "Failed to mark gdb socket non-blocking"); #ifndef WITHOUT_CAPSICUM limit_gdb_socket(s); #endif mevent_add(s, EVF_READ, new_connection, NULL); } Index: head/usr.sbin/bhyve/gdb.h =================================================================== --- head/usr.sbin/bhyve/gdb.h (revision 344854) +++ head/usr.sbin/bhyve/gdb.h (revision 344855) @@ -1,39 +1,38 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 John H. Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __GDB_H__ #define __GDB_H__ void gdb_cpu_add(int vcpu); void gdb_cpu_mtrap(int vcpu); void gdb_cpu_suspend(int vcpu); void init_gdb(struct vmctx *ctx, int sport, bool wait); #endif /* !__GDB_H__ */ Index: head/usr.sbin/devctl/devctl.8 =================================================================== --- head/usr.sbin/devctl/devctl.8 (revision 344854) +++ head/usr.sbin/devctl/devctl.8 (revision 344855) @@ -1,178 +1,177 @@ .\" .\" Copyright (c) 2015 John Baldwin -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd August 29, 2016 .Dt DEVCTL 8 .Os .Sh NAME .Nm devctl .Nd device control utility .Sh SYNOPSIS .Nm .Cm attach .Ar device .Nm .Cm clear driver .Op Fl f .Ar device .Nm .Cm detach .Op Fl f .Ar device .Nm .Cm disable .Op Fl f .Ar device .Nm .Cm enable .Ar device .Nm .Cm suspend .Ar device .Nm .Cm resume .Ar device .Nm .Cm set driver .Op Fl f .Ar device driver .Nm .Cm rescan .Ar device .Nm .Cm delete .Op Fl f .Ar device .Sh DESCRIPTION The .Nm utility adjusts the state of individual devices in the kernel's internal device hierarchy. Each invocation of .Nm consists of a single command followed by command-specific arguments. Each command operates on a single device specified via the .Ar device argument. The .Ar device may be specified either as the name of an existing device or as a bus-specific address. More details on supported address formats can be found in .Xr devctl 3 . .Pp The following commands are supported: .Bl -tag -width indent .It Cm attach Ar device Force the kernel to re-probe the device. If a suitable driver is found, it is attached to the device. .It Xo Cm detach .Op Fl f .Ar device .Xc Detach the device from its current device driver. If the .Fl f flag is specified, the device driver will be detached even if the device is busy. .It Xo Cm disable .Op Fl f .Ar device .Xc Disable a device. If the device is currently attached to a device driver, the device driver will be detached from the device, but the device will retain its current name. If the .Fl f flag is specified, the device driver will be detached even if the device is busy. .It Cm enable Ar device Enable a device. The device will probe and attach if a suitable device driver is found. Note that this can re-enable a device disabled at boot time via a loader tunable. .It Cm suspend Ar device Suspend a device. This may include placing the device in a reduced power state. .It Cm resume Ar device Resume a suspended device to a fully working state. .It Xo Cm set driver .Op Fl f .Ar device driver .Xc Force the device to use a device driver named .Ar driver . If the device is already attached to a device driver and the .Fl f flag is specified, the device will be detached from its current device driver before it is attached to the new device driver. If the device is already attached to a device driver and the .Fl f flag is not specified, the device will not be changed. .It Xo Cm clear driver .Op Fl f .Ar device .Xc Clear a previously-forced driver name so that the device is able to use any valid device driver. After the previous name has been cleared, the device is reprobed so that other device drivers may attach to it. This can be used to undo an earlier .Cm set driver command. If the device is currently attached to a device driver and the .Fl f flag is not specified, the device will not be changed. .It Cm rescan Ar device Rescan a bus device checking for devices that have been added or removed. .It Xo Cm delete .Op Fl f .Ar device .Xc Delete the device from the device tree. If the .Fl f flag is specified, the device will be deleted even if it is physically present. This command should be used with care as a device that is deleted but present can no longer be used unless the parent bus device rediscovers the device via a rescan request. .El .Sh SEE ALSO .Xr devctl 3 , .Xr devinfo 8 .Sh HISTORY The .Nm utility first appeared in .Fx 10.3 . Index: head/usr.sbin/devctl/devctl.c =================================================================== --- head/usr.sbin/devctl/devctl.c (revision 344854) +++ head/usr.sbin/devctl/devctl.c (revision 344855) @@ -1,408 +1,407 @@ /*- * Copyright (c) 2014 John Baldwin - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include struct devctl_command { const char *name; int (*handler)(int ac, char **av); }; #define DEVCTL_DATASET(name) devctl_ ## name ## _table #define DEVCTL_COMMAND(set, name, function) \ static struct devctl_command function ## _devctl_command = \ { #name, function }; \ DATA_SET(DEVCTL_DATASET(set), function ## _devctl_command) #define DEVCTL_TABLE(set, name) \ SET_DECLARE(DEVCTL_DATASET(name), struct devctl_command); \ \ static int \ devctl_ ## name ## _table_handler(int ac, char **av) \ { \ return (devctl_table_handler(SET_BEGIN(DEVCTL_DATASET(name)), \ SET_LIMIT(DEVCTL_DATASET(name)), ac, av)); \ } \ DEVCTL_COMMAND(set, name, devctl_ ## name ## _table_handler) static int devctl_table_handler(struct devctl_command **start, struct devctl_command **end, int ac, char **av); SET_DECLARE(DEVCTL_DATASET(top), struct devctl_command); DEVCTL_TABLE(top, clear); DEVCTL_TABLE(top, set); static void usage(void) { fprintf(stderr, "usage: devctl attach device\n" " devctl detach [-f] device\n" " devctl disable [-f] device\n" " devctl enable device\n" " devctl suspend device\n" " devctl resume device\n" " devctl set driver [-f] device driver\n" " devctl clear driver [-f] device\n" " devctl rescan device\n" " devctl delete [-f] device\n" " devctl freeze\n" " devctl thaw\n"); exit(1); } static int devctl_table_handler(struct devctl_command **start, struct devctl_command **end, int ac, char **av) { struct devctl_command **cmd; if (ac < 2) { warnx("The %s command requires a sub-command.", av[0]); return (EINVAL); } for (cmd = start; cmd < end; cmd++) { if (strcmp((*cmd)->name, av[1]) == 0) return ((*cmd)->handler(ac - 1, av + 1)); } warnx("%s is not a valid sub-command of %s.", av[1], av[0]); return (ENOENT); } static int help(int ac __unused, char **av __unused) { usage(); return (0); } DEVCTL_COMMAND(top, help, help); static int attach(int ac, char **av) { if (ac != 2) usage(); if (devctl_attach(av[1]) < 0) err(1, "Failed to attach %s", av[1]); return (0); } DEVCTL_COMMAND(top, attach, attach); static void detach_usage(void) { fprintf(stderr, "usage: devctl detach [-f] device\n"); exit(1); } static int detach(int ac, char **av) { bool force; int ch; force = false; while ((ch = getopt(ac, av, "f")) != -1) switch (ch) { case 'f': force = true; break; default: detach_usage(); } ac -= optind; av += optind; if (ac != 1) detach_usage(); if (devctl_detach(av[0], force) < 0) err(1, "Failed to detach %s", av[0]); return (0); } DEVCTL_COMMAND(top, detach, detach); static void disable_usage(void) { fprintf(stderr, "usage: devctl disable [-f] device\n"); exit(1); } static int disable(int ac, char **av) { bool force; int ch; force = false; while ((ch = getopt(ac, av, "f")) != -1) switch (ch) { case 'f': force = true; break; default: disable_usage(); } ac -= optind; av += optind; if (ac != 1) disable_usage(); if (devctl_disable(av[0], force) < 0) err(1, "Failed to disable %s", av[0]); return (0); } DEVCTL_COMMAND(top, disable, disable); static int enable(int ac, char **av) { if (ac != 2) usage(); if (devctl_enable(av[1]) < 0) err(1, "Failed to enable %s", av[1]); return (0); } DEVCTL_COMMAND(top, enable, enable); static int suspend(int ac, char **av) { if (ac != 2) usage(); if (devctl_suspend(av[1]) < 0) err(1, "Failed to suspend %s", av[1]); return (0); } DEVCTL_COMMAND(top, suspend, suspend); static int resume(int ac, char **av) { if (ac != 2) usage(); if (devctl_resume(av[1]) < 0) err(1, "Failed to resume %s", av[1]); return (0); } DEVCTL_COMMAND(top, resume, resume); static void set_driver_usage(void) { fprintf(stderr, "usage: devctl set driver [-f] device driver\n"); exit(1); } static int set_driver(int ac, char **av) { bool force; int ch; force = false; while ((ch = getopt(ac, av, "f")) != -1) switch (ch) { case 'f': force = true; break; default: set_driver_usage(); } ac -= optind; av += optind; if (ac != 2) set_driver_usage(); if (devctl_set_driver(av[0], av[1], force) < 0) err(1, "Failed to set %s driver to %s", av[0], av[1]); return (0); } DEVCTL_COMMAND(set, driver, set_driver); static void clear_driver_usage(void) { fprintf(stderr, "usage: devctl clear driver [-f] device\n"); exit(1); } static int clear_driver(int ac, char **av) { bool force; int ch; force = false; while ((ch = getopt(ac, av, "f")) != -1) switch (ch) { case 'f': force = true; break; default: clear_driver_usage(); } ac -= optind; av += optind; if (ac != 1) clear_driver_usage(); if (devctl_clear_driver(av[0], force) < 0) err(1, "Failed to clear %s driver", av[0]); return (0); } DEVCTL_COMMAND(clear, driver, clear_driver); static int rescan(int ac, char **av) { if (ac != 2) usage(); if (devctl_rescan(av[1]) < 0) err(1, "Failed to rescan %s", av[1]); return (0); } DEVCTL_COMMAND(top, rescan, rescan); static void delete_usage(void) { fprintf(stderr, "usage: devctl delete [-f] device\n"); exit(1); } static int delete(int ac, char **av) { bool force; int ch; force = false; while ((ch = getopt(ac, av, "f")) != -1) switch (ch) { case 'f': force = true; break; default: delete_usage(); } ac -= optind; av += optind; if (ac != 1) delete_usage(); if (devctl_delete(av[0], force) < 0) err(1, "Failed to delete %s", av[0]); return (0); } DEVCTL_COMMAND(top, delete, delete); static void freeze_usage(void) { fprintf(stderr, "usage: devctl freeze\n"); exit(1); } static int freeze(int ac, char **av __unused) { if (ac != 1) freeze_usage(); if (devctl_freeze() < 0) err(1, "Failed to freeze probe/attach"); return (0); } DEVCTL_COMMAND(top, freeze, freeze); static void thaw_usage(void) { fprintf(stderr, "usage: devctl thaw\n"); exit(1); } static int thaw(int ac, char **av __unused) { if (ac != 1) thaw_usage(); if (devctl_thaw() < 0) err(1, "Failed to thaw probe/attach"); return (0); } DEVCTL_COMMAND(top, thaw, thaw); int main(int ac, char *av[]) { struct devctl_command **cmd; if (ac == 1) usage(); ac--; av++; SET_FOREACH(cmd, DEVCTL_DATASET(top)) { if (strcmp((*cmd)->name, av[0]) == 0) { if ((*cmd)->handler(ac, av) != 0) return (1); else return (0); } } warnx("Unknown command %s.", av[0]); return (1); }