Index: head/stand/i386/libi386/bootinfo64.c
===================================================================
--- head/stand/i386/libi386/bootinfo64.c	(revision 356939)
+++ head/stand/i386/libi386/bootinfo64.c	(revision 356940)
@@ -1,270 +1,271 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <machine/bootinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/metadata.h>
 #include <machine/psl.h>
 #include <machine/specialreg.h>
 #include "bootstrap.h"
 #include "libi386.h"
 #include "btxv86.h"
 
 #ifdef LOADER_GELI_SUPPORT
 #include "geliboot.h"
 #endif
 
 /*
  * Copy module-related data into the load area, where it can be
  * used as a directory for loaded modules.
  *
  * Module data is presented in a self-describing format.  Each datum
  * is preceded by a 32-bit identifier and a 32-bit size field.
  *
  * Currently, the following data are saved:
  *
  * MOD_NAME	(variable)		module name (string)
  * MOD_TYPE	(variable)		module type (string)
  * MOD_ARGS	(variable)		module parameters (string)
  * MOD_ADDR	sizeof(vm_offset_t)	module load address
  * MOD_SIZE	sizeof(size_t)		module size
  * MOD_METADATA	(variable)		type-specific metadata
  */
 #define COPY32(v, a, c) {			\
     uint32_t	x = (v);			\
     if (c)					\
 	i386_copyin(&x, a, sizeof(x));		\
     a += sizeof(x);				\
 }
 
 #define MOD_STR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(strlen(s) + 1, a, c);		\
     if (c)					\
 	i386_copyin(s, a, strlen(s) + 1);	\
     a += roundup(strlen(s) + 1, sizeof(uint64_t));\
 }
 
 #define MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
 #define MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
 #define MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
 
 #define MOD_VAR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(sizeof(s), a, c);			\
     if (c)					\
 	i386_copyin(&s, a, sizeof(s));		\
     a += roundup(sizeof(s), sizeof(uint64_t));	\
 }
 
 #define MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
 #define MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
 
 #define MOD_METADATA(a, mm, c) {		\
     COPY32(MODINFO_METADATA | mm->md_type, a, c); \
     COPY32(mm->md_size, a, c);			\
     if (c)					\
 	i386_copyin(mm->md_data, a, mm->md_size); \
     a += roundup(mm->md_size, sizeof(uint64_t));\
 }
 
 #define MOD_END(a, c) {				\
     COPY32(MODINFO_END, a, c);			\
     COPY32(0, a, c);				\
 }
 
 static vm_offset_t
 bi_copymodules64(vm_offset_t addr)
 {
     struct preloaded_file	*fp;
     struct file_metadata	*md;
     int				c;
     uint64_t			v;
 
     c = addr != 0;
     /* start with the first module on the list, should be the kernel */
     for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
 
 	MOD_NAME(addr, fp->f_name, c);	/* this field must come first */
 	MOD_TYPE(addr, fp->f_type, c);
 	if (fp->f_args)
 	    MOD_ARGS(addr, fp->f_args, c);
 	v = fp->f_addr;
 	MOD_ADDR(addr, v, c);
 	v = fp->f_size;
 	MOD_SIZE(addr, v, c);
 	for (md = fp->f_metadata; md != NULL; md = md->md_next)
 	    if (!(md->md_type & MODINFOMD_NOCOPY))
 		MOD_METADATA(addr, md, c);
     }
     MOD_END(addr, c);
     return(addr);
 }
 
 /*
  * Check to see if this CPU supports long mode.
  */
 static int
 bi_checkcpu(void)
 {
     char *cpu_vendor;
     int vendor[3];
     int eflags;
     unsigned int regs[4];
 
     /* Check for presence of "cpuid". */
     eflags = read_eflags();
     write_eflags(eflags ^ PSL_ID);
     if (!((eflags ^ read_eflags()) & PSL_ID))
 	return (0);
 
     /* Fetch the vendor string. */
     do_cpuid(0, regs);
     vendor[0] = regs[1];
     vendor[1] = regs[3];
     vendor[2] = regs[2];
     cpu_vendor = (char *)vendor;
 
     /* Check for vendors that support AMD features. */
     if (strncmp(cpu_vendor, INTEL_VENDOR_ID, 12) != 0 &&
 	strncmp(cpu_vendor, AMD_VENDOR_ID, 12) != 0 &&
+	strncmp(cpu_vendor, HYGON_VENDOR_ID, 12) != 0 &&
 	strncmp(cpu_vendor, CENTAUR_VENDOR_ID, 12) != 0)
 	return (0);
 
     /* Has to support AMD features. */
     do_cpuid(0x80000000, regs);
     if (!(regs[0] >= 0x80000001))
 	return (0);
 
     /* Check for long mode. */
     do_cpuid(0x80000001, regs);
     return (regs[3] & AMDID_LM);
 }
 
 /*
  * Load the information expected by an amd64 kernel.
  *
  * - The 'boothowto' argument is constructed
  * - The 'bootdev' argument is constructed
  * - The 'bootinfo' struct is constructed, and copied into the kernel space.
  * - The kernel environment is copied into kernel space.
  * - Module metadata are formatted and placed in kernel space.
  */
 int
 bi_load64(char *args, vm_offset_t addr, vm_offset_t *modulep,
     vm_offset_t *kernendp, int add_smap)
 {
     struct preloaded_file	*xp, *kfp;
     struct i386_devdesc		*rootdev;
     struct file_metadata	*md;
     uint64_t			kernend;
     uint64_t			envp;
     uint64_t			module;
     vm_offset_t			size;
     char			*rootdevname;
     int				howto;
 
     if (!bi_checkcpu()) {
 	printf("CPU doesn't support long mode\n");
 	return (EINVAL);
     }
 
     howto = bi_getboothowto(args);
 
     /*
      * Allow the environment variable 'rootdev' to override the supplied device
      * This should perhaps go to MI code and/or have $rootdev tested/set by
      * MI code before launching the kernel.
      */
     rootdevname = getenv("rootdev");
     i386_getdev((void **)(&rootdev), rootdevname, NULL);
     if (rootdev == NULL) {		/* bad $rootdev/$currdev */
 	printf("can't determine root device\n");
 	return(EINVAL);
     }
 
     /* Try reading the /etc/fstab file to select the root device */
     getrootmount(i386_fmtdev((void *)rootdev));
 
     if (addr == 0) {
         /* find the last module in the chain */
         for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
             if (addr < (xp->f_addr + xp->f_size))
                 addr = xp->f_addr + xp->f_size;
         }
     }
     /* pad to a page boundary */
     addr = roundup(addr, PAGE_SIZE);
 
     /* place the metadata before anything */
     module = *modulep = addr;
 
     kfp = file_findfile(NULL, "elf kernel");
     if (kfp == NULL)
       kfp = file_findfile(NULL, "elf64 kernel");
     if (kfp == NULL)
 	panic("can't find kernel file");
     kernend = 0;	/* fill it in later */
     file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
     file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
     file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
     file_addmetadata(kfp, MODINFOMD_MODULEP, sizeof module, &module);
     if (add_smap != 0)
         bios_addsmapdata(kfp);
 #ifdef LOADER_GELI_SUPPORT
     geli_export_key_metadata(kfp);
 #endif
 
     size = bi_copymodules64(0);
 
     /* copy our environment */
     envp = roundup(addr + size, PAGE_SIZE);
     addr = bi_copyenv(envp);
 
     /* set kernend */
     kernend = roundup(addr, PAGE_SIZE);
     *kernendp = kernend;
 
     /* patch MODINFOMD_KERNEND */
     md = file_findmetadata(kfp, MODINFOMD_KERNEND);
     bcopy(&kernend, md->md_data, sizeof kernend);
 
     /* patch MODINFOMD_ENVP */
     md = file_findmetadata(kfp, MODINFOMD_ENVP);
     bcopy(&envp, md->md_data, sizeof envp);
 
     /* copy module list and metadata */
     (void)bi_copymodules64(*modulep);
 
     return(0);
 }
Index: head/sys/amd64/amd64/initcpu.c
===================================================================
--- head/sys/amd64/amd64/initcpu.c	(revision 356939)
+++ head/sys/amd64/amd64/initcpu.c	(revision 356940)
@@ -1,305 +1,307 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) KATO Takenori, 1997, 1998.
  * 
  * All rights reserved.  Unpublished rights reserved under the copyright
  * laws of Japan.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 static int	hw_instruction_sse;
 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
     &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
 static int	lower_sharedpage_init;
 int		hw_lower_amd64_sharedpage;
 SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN,
     &hw_lower_amd64_sharedpage, 0,
    "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory");
 /*
  * -1: automatic (default)
  *  0: keep enable CLFLUSH
  *  1: force disable CLFLUSH
  */
 static int	hw_clflush_disable = -1;
 
 static void
 init_amd(void)
 {
 	uint64_t msr;
 
 	/*
 	 * Work around Erratum 721 for Family 10h and 12h processors.
 	 * These processors may incorrectly update the stack pointer
 	 * after a long series of push and/or near-call instructions,
 	 * or a long series of pop and/or near-return instructions.
 	 *
 	 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
 	 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
 	 *
 	 * Hypervisors do not provide access to the errata MSR,
 	 * causing #GP exception on attempt to apply the errata.  The
 	 * MSR write shall be done on host and persist globally
 	 * anyway, so do not try to do it when under virtualization.
 	 */
 	switch (CPUID_TO_FAMILY(cpu_id)) {
 	case 0x10:
 	case 0x12:
 		if ((cpu_feature2 & CPUID2_HV) == 0)
 			wrmsr(0xc0011029, rdmsr(0xc0011029) | 1);
 		break;
 	}
 
 	/*
 	 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG.
 	 * So, do it here or otherwise some tools could be confused by
 	 * Initial Local APIC ID reported with CPUID Function 1 in EBX.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
 		if ((cpu_feature2 & CPUID2_HV) == 0) {
 			msr = rdmsr(MSR_NB_CFG1);
 			msr |= (uint64_t)1 << 54;
 			wrmsr(MSR_NB_CFG1, msr);
 		}
 	}
 
 	/*
 	 * BIOS may configure Family 10h processors to convert WC+ cache type
 	 * to CD.  That can hurt performance of guest VMs using nested paging.
 	 * The relevant MSR bit is not documented in the BKDG,
 	 * the fix is borrowed from Linux.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
 		if ((cpu_feature2 & CPUID2_HV) == 0) {
 			msr = rdmsr(0xc001102a);
 			msr &= ~((uint64_t)1 << 24);
 			wrmsr(0xc001102a, msr);
 		}
 	}
 
 	/*
 	 * Work around Erratum 793: Specific Combination of Writes to Write
 	 * Combined Memory Types and Locked Instructions May Cause Core Hang.
 	 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors,
 	 * revision 3.04 or later, publication 51810.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) {
 		if ((cpu_feature2 & CPUID2_HV) == 0) {
 			msr = rdmsr(MSR_LS_CFG);
 			msr |= (uint64_t)1 << 15;
 			wrmsr(MSR_LS_CFG, msr);
 		}
 	}
 
 	/* Ryzen erratas. */
 	if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 &&
 	    (cpu_feature2 & CPUID2_HV) == 0) {
 		/* 1021 */
 		msr = rdmsr(0xc0011029);
 		msr |= 0x2000;
 		wrmsr(0xc0011029, msr);
 
 		/* 1033 */
 		msr = rdmsr(MSR_LS_CFG);
 		msr |= 0x10;
 		wrmsr(MSR_LS_CFG, msr);
 
 		/* 1049 */
 		msr = rdmsr(0xc0011028);
 		msr |= 0x10;
 		wrmsr(0xc0011028, msr);
 
 		/* 1095 */
 		msr = rdmsr(MSR_LS_CFG);
 		msr |= 0x200000000000000;
 		wrmsr(MSR_LS_CFG, msr);
 	}
 
 	/*
 	 * Work around a problem on Ryzen that is triggered by executing
 	 * code near the top of user memory, in our case the signal
 	 * trampoline code in the shared page on amd64.
 	 *
 	 * This function is executed once for the BSP before tunables take
 	 * effect so the value determined here can be overridden by the
 	 * tunable.  This function is then executed again for each AP and
 	 * also on resume.  Set a flag the first time so that value set by
 	 * the tunable is not overwritten.
 	 *
 	 * The stepping and/or microcode versions should be checked after
 	 * this issue is fixed by AMD so that we don't use this mode if not
 	 * needed.
 	 */
 	if (lower_sharedpage_init == 0) {
 		lower_sharedpage_init = 1;
-		if (CPUID_TO_FAMILY(cpu_id) == 0x17) {
+		if (CPUID_TO_FAMILY(cpu_id) == 0x17 ||
+		    CPUID_TO_FAMILY(cpu_id) == 0x18) {
 			hw_lower_amd64_sharedpage = 1;
 		}
 	}
 }
 
 /*
  * Initialize special VIA features
  */
 static void
 init_via(void)
 {
 	u_int regs[4], val;
 
 	/*
 	 * Check extended CPUID for PadLock features.
 	 *
 	 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf
 	 */
 	do_cpuid(0xc0000000, regs);
 	if (regs[0] >= 0xc0000001) {
 		do_cpuid(0xc0000001, regs);
 		val = regs[3];
 	} else
 		return;
 
 	/* Enable RNG if present. */
 	if ((val & VIA_CPUID_HAS_RNG) != 0) {
 		via_feature_rng = VIA_HAS_RNG;
 		wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG);
 	}
 
 	/* Enable PadLock if present. */
 	if ((val & VIA_CPUID_HAS_ACE) != 0)
 		via_feature_xcrypt |= VIA_HAS_AES;
 	if ((val & VIA_CPUID_HAS_ACE2) != 0)
 		via_feature_xcrypt |= VIA_HAS_AESCTR;
 	if ((val & VIA_CPUID_HAS_PHE) != 0)
 		via_feature_xcrypt |= VIA_HAS_SHA;
 	if ((val & VIA_CPUID_HAS_PMM) != 0)
 		via_feature_xcrypt |= VIA_HAS_MM;
 	if (via_feature_xcrypt != 0)
 		wrmsr(0x1107, rdmsr(0x1107) | (1 << 28));
 }
 
 /*
  * Initialize CPU control registers
  */
 void
 initializecpu(void)
 {
 	uint64_t msr;
 	uint32_t cr4;
 
 	cr4 = rcr4();
 	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
 		cr4 |= CR4_FXSR | CR4_XMM;
 		cpu_fxsr = hw_instruction_sse = 1;
 	}
 	if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
 		cr4 |= CR4_FSGSBASE;
 
 	if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
 		cr4 |= CR4_PKE;
 
 	/*
 	 * Postpone enabling the SMEP on the boot CPU until the page
 	 * tables are switched from the boot loader identity mapping
 	 * to the kernel tables.  The boot loader enables the U bit in
 	 * its tables.
 	 */
 	if (!IS_BSP()) {
 		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
 			cr4 |= CR4_SMEP;
 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
 			cr4 |= CR4_SMAP;
 	}
 	load_cr4(cr4);
 	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
 		msr = rdmsr(MSR_EFER) | EFER_NXE;
 		wrmsr(MSR_EFER, msr);
 		pg_nx = PG_NX;
 	}
 	hw_ibrs_recalculate();
 	hw_ssb_recalculate(false);
 	amd64_syscall_ret_flush_l1d_recalc();
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_AMD:
+	case CPU_VENDOR_HYGON:
 		init_amd();
 		break;
 	case CPU_VENDOR_CENTAUR:
 		init_via();
 		break;
 	}
 
 	if ((amd_feature & AMDID_RDTSCP) != 0 ||
 	    (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
 		wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
 }
 
 void
 initializecpucache(void)
 {
 
 	/*
 	 * CPUID with %eax = 1, %ebx returns
 	 * Bits 15-8: CLFLUSH line size
 	 * 	(Value * 8 = cache line size in bytes)
 	 */
 	if ((cpu_feature & CPUID_CLFSH) != 0)
 		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
 	/*
 	 * XXXKIB: (temporary) hack to work around traps generated
 	 * when CLFLUSHing APIC register window under virtualization
 	 * environments.  These environments tend to disable the
 	 * CPUID_SS feature even though the native CPU supports it.
 	 */
 	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
 	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
 		cpu_feature &= ~CPUID_CLFSH;
 		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
 	}
 
 	/*
 	 * The kernel's use of CLFLUSH{,OPT} can be disabled manually
 	 * by setting the hw.clflush_disable tunable.
 	 */
 	if (hw_clflush_disable == 1) {
 		cpu_feature &= ~CPUID_CLFSH;
 		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
 	}
 }
Index: head/sys/i386/i386/machdep.c
===================================================================
--- head/sys/i386/i386/machdep.c	(revision 356939)
+++ head/sys/i386/i386/machdep.c	(revision 356940)
@@ -1,3254 +1,3255 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2018 The FreeBSD Foundation
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_atpic.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <isa/rtc.h>
 
 #include <net/netisr.h>
 
 #include <machine/bootinfo.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/sysarch.h>
 #include <machine/trap.h>
 #include <x86/ucode.h>
 #include <machine/vm86.h>
 #include <x86/init.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_APIC
 #include <x86/apicvar.h>
 #endif
 
 #ifdef DEV_ISA
 #include <x86/isa/icu.h>
 #endif
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 register_t init386(int first);
 void dblfault_handler(void);
 void identify_cpu(void);
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel;
 u_int	basemem;
 static int above4g_allow = 1;
 static int above24g_allow = 0;
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 long Maxmem = 0;
 long realmem = 0;
 
 #ifdef PAE
 FEATURE(pae, "Physical Address Extensions");
 #endif
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
 extern char start_exceptions[], end_exceptions[];
 
 extern struct sysentvec elf32_freebsd_sysvec;
 
 /* Default init_ops implementation. */
 struct init_ops init_ops = {
 	.early_clock_source_init =	i8254_init,
 	.early_delay =			i8254_delay,
 #ifdef DEV_APIC
 	.msi_init =			msi_init,
 #endif
 };
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge.
 	 */
 	sysenv = kern_getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = kern_getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)vm_free_count()))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	if (p->p_sysent->sv_sigcode_base != 0) {
 		regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned int)sp & ~0x3F);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
 	if (regs->tf_eip == 0)
 		regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct trapframe *regs;
 	struct ucontext4 *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 			if (xfpustate_len > cpu_max_ext_state_size -
 			    sizeof(union savefpu)) {
 				uprintf(
 			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 				    p->p_pid, td->td_name, xfpustate_len);
 				return (EINVAL);
 			}
 			xfpustate = __builtin_alloca(xfpustate_len);
 			error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 			    xfpustate, xfpustate_len);
 			if (error != 0) {
 				uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 				    p->p_pid, td->td_name);
 				return (error);
 			}
 		} else {
 			xfpustate = NULL;
 			xfpustate_len = 0;
 		}
 		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_43
 static void
 setup_priv_lcall_gate(struct proc *p)
 {
 	struct i386_ldt_args uap;
 	union descriptor desc;
 	u_int lcall_addr;
 
 	bzero(&uap, sizeof(uap));
 	uap.start = 0;
 	uap.num = 1;
 	lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 	bzero(&desc, sizeof(desc));
 	desc.sd.sd_type = SDT_MEMERA;
 	desc.sd.sd_dpl = SEL_UPL;
 	desc.sd.sd_p = 1;
 	desc.sd.sd_def32 = 1;
 	desc.sd.sd_gran = 1;
 	desc.sd.sd_lolimit = 0xffff;
 	desc.sd.sd_hilimit = 0xf;
 	desc.sd.sd_lobase = lcall_addr;
 	desc.sd.sd_hibase = lcall_addr >> 24;
 	i386_set_ldt(curthread, &uap, &desc);
 }
 #endif
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_eflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
 
 #ifdef COMPAT_43
 	if (td->td_proc->p_sysent->sv_psstrings !=
 	    elf32_freebsd_sysvec.sv_psstrings)
 		setup_priv_lcall_gate(td->td_proc);
 #endif
   
 	/*
 	 * Reset the fs and gs bases.  The values from the old address
 	 * space do not make sense for the new program.  In particular,
 	 * gsbase might be the TLS base for the old program but the new
 	 * program has no TLS now.
 	 */
 	set_fsbase(td, 0);
 	set_gsbase(td, 0);
 
 	/* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 	saved_eflags = regs->tf_eflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | saved_eflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = imgp->ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == curpcb) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
 		pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 	 *
 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
 	 * bit to control the trap, because setting the CR0_EM bit does
 	 * not cause WAIT instructions to trap.  It's important to trap
 	 * WAIT instructions - otherwise the "wait" variants of no-wait
 	 * control instructions would degenerate to the "no-wait" variants
 	 * after FP context switches but work correctly otherwise.  It's
 	 * particularly important to trap WAITs when there is no NPX -
 	 * otherwise the "wait" variants would always degenerate.
 	 *
 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
 	 * Setting it should fail or do nothing on lesser processors.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 u_long bootdev;		/* not a struct cdev *- encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
 static char bootmethod[16] = "BIOS";
 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
     "System firmware boot method");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
 union descriptor gdt0[NGDT];	/* initial global descriptor table */
 union descriptor *gdt = gdt0;	/* global descriptor table */
 
 union descriptor *ldt;		/* local descriptor table */
 
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 
 static struct i386tss *dblfault_tss;
 static char *dblfault_stack;
 
 static struct i386tss common_tss0;
 
 vm_offset_t proc0kstack;
 
 /*
  * software prototypes -- in more palatable form.
  *
  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPRIV_SEL	1 SMP Per-Processor Private Data Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUFS_SEL	2 %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS_SEL	3 %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	6 Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	.ssd_base = 0x400,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GLDT_SEL	10 LDT Descriptor */
 {	.ssd_base = 0,
 	.ssd_limit = sizeof(union descriptor) * NLDT - 1,
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 User LDT Descriptor per process */
 {	.ssd_base = 0,
 	.ssd_limit = (512 * sizeof(union descriptor)-1),
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPANIC_SEL	12 Panic Tss Descriptor */
 {	.ssd_base = 0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GNDIS_SEL	18 NDIS Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 };
 
 uintptr_t setidt_disp;
 
 void
 setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
 {
 	uintptr_t off;
 
 	off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
 	setidt_nodisp(idx, off, typ, dpl, selec);
 }
 
 void
 setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = off;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((u_int)off) >> 16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
 #endif
 #ifdef XENHVM
 	IDTVEC(xen_intr_upcall),
 #endif
 	IDTVEC(int0x80_syscall);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func, func_trm;
 	bool trm;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		if (ip->gd_type == SDT_SYSTASKGT) {
 			db_printf("%3d\t<TASK>\n", idx);
 		} else {
 			func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 			if (func >= PMAP_TRM_MIN_ADDRESS) {
 				func_trm = func;
 				func -= setidt_disp;
 				trm = true;
 			} else
 				trm = false;
 			if (func != (uintptr_t)&IDTVEC(rsvd)) {
 				db_printf("%3d\t", idx);
 				db_printsym(func, DB_STGY_PROC);
 				if (trm)
 					db_printf(" (trampoline %#x)",
 					    func_trm);
 				db_printf("\n");
 			}
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	uint64_t idtr, gdtr;
 
 	idtr = ridt();
 	db_printf("idtr\t0x%08x/%04x\n",
 	    (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 	gdtr = rgdt();
 	db_printf("gdtr\t0x%08x/%04x\n",
 	    (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 	db_printf("ldtr\t0x%04x\n", rldt());
 	db_printf("tr\t0x%04x\n", rtr());
 	db_printf("cr0\t0x%08x\n", rcr0());
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
 	if (rcr4() & CR4_XSAVE)
 		db_printf("xcr0\t0x%016llx\n", rxcr(0));
 	if (amd_feature & (AMDID_NX | AMDID_LM))
 		db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 	if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 		db_printf("FEATURES_CTL\t0x%016llx\n",
 		    rdmsr(MSR_IA32_FEATURE_CONTROL));
-	if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
-	    cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
+	if (((cpu_vendor_id == CPU_VENDOR_INTEL ||
+	    cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) ||
+	    cpu_vendor_id == CPU_VENDOR_HYGON)
 		db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 	if (cpu_feature & CPUID_PAT)
 		db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 }
 
 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 {
 
 	db_printf("dr0\t0x%08x\n", rdr0());
 	db_printf("dr1\t0x%08x\n", rdr1());
 	db_printf("dr2\t0x%08x\n", rdr2());
 	db_printf("dr3\t0x%08x\n", rdr3());
 	db_printf("dr6\t0x%08x\n", rdr6());
 	db_printf("dr7\t0x%08x\n", rdr7());	
 }
 
 DB_SHOW_COMMAND(frame, db_show_frame)
 {
 	struct trapframe *frame;
 
 	frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 	printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
 	    frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
 	    frame->tf_eip);
 	printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
 	printf("ds %#x es %#x fs %#x\n",
 	    frame->tf_ds, frame->tf_es, frame->tf_fs);
 	printf("eax %#x ecx %#x edx %#x ebx %#x\n",
 	    frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
 	printf("ebp %#x esi %#x edi %#x\n",
 	    frame->tf_ebp, frame->tf_esi, frame->tf_edi);
 
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	uint64_t lim, ign;
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 	
 	if (length == 0)
 		return (1);
 
 	lim = 0x100000000;					/*  4G */
 	if (pae_mode && above4g_allow)
 		lim = above24g_allow ? -1ULL : 0x600000000;	/* 24G */
 	if (base >= lim) {
 		printf("%uK of memory above %uGB ignored, pae %d "
 		    "above4g_allow %d above24g_allow %d\n",
 		    (u_int)(length / 1024), (u_int)(lim >> 30), pae_mode,
 		    above4g_allow, above24g_allow);
 		return (1);
 	}
 	if (base + length >= lim) {
 		ign = base + length - lim;
 		length -= ign;
 		printf("%uK of memory above %uGB ignored, pae %d "
 		    "above4g_allow %d above24g_allow %d\n",
 		    (u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode,
 		    above4g_allow, above24g_allow);
 	}
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYS_AVAIL_ENTRIES) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016llx len=%016llx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	return (add_physmap_entry(smap->base, smap->length, physmap,
 	    physmap_idxp));
 }
 
 static void
 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	struct bios_smap *smap, *smapend;
 	u_int32_t smapsize;
 	/*
 	 * Memory map from INT 15:E820.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes SMAP.
 	 */
 	smapsize = *((u_int32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++)
 		if (!add_smap_entry(smap, physmap, physmap_idxp))
 			break;
 }
 
 static void
 basemem_setup(void)
 {
 
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	pmap_basemem_setup(basemem);
 }
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long memtest;
 	vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
 	quad_t dcons_addr, dcons_size, physmem_tunable;
 	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa;
 	struct bios_smap *smap, *smapbase;
 	caddr_t kmdp;
 
 	has_smap = 0;
 	bzero(&vmf, sizeof(vmf));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Tell the physical memory allocator about pages used to store
 	 * the kernel and preloaded data.  See kmem_bootstrap_free().
 	 */
 	vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 
 	TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
 	TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);
 
 	/*
 	 * Check if the loader supplied an SMAP memory map.  If so,
 	 * use that and do not make any VM86 calls.
 	 */
 	physmap_idx = 0;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase != NULL) {
 		add_smap_entries(smapbase, physmap, &physmap_idx);
 		has_smap = 1;
 		goto have_smap;
 	}
 
 	/*
 	 * Some newer BIOSes have a broken INT 12H implementation
 	 * which causes a kernel panic immediately.  In this case, we
 	 * need use the SMAP to determine the base memory size.
 	 */
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	if (hasbrokenint12 == 0) {
 		/* Use INT12 to determine base memory size. */
 		vm86_intcall(0x12, &vmf);
 		basemem = vmf.vmf_ax;
 		basemem_setup();
 	}
 
 	/*
 	 * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 	 * the kernel page table so we can use it as a buffer.  The
 	 * kernel will unmap this page later.
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
 	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		has_smap = 1;
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 	} while (vmf.vmf_ebx != 0);
 
 have_smap:
 	/*
 	 * If we didn't fetch the "base memory" size from INT12,
 	 * figure it out from the SMAP (or just guess).
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		/* XXX: If we couldn't find basemem from SMAP, just guess. */
 		if (basemem == 0)
 			basemem = 640;
 		basemem_setup();
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed to find an SMAP, figure out the extended
 	 * memory size.  We will then build a simple memory map with
 	 * two segments, one for "base memory" and the second for
 	 * "extended memory".  Note that "extended memory" starts at a
 	 * physical address of 1MB and that both basemem and extmem
 	 * are in units of 1KB.
 	 *
 	 * First, try to fetch the extended memory size via INT 15:E801.
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 		/*
 		 * If INT15:E801 fails, this is our last ditch effort
 		 * to determine the extended memory size.  Currently
 		 * we prefer the RTC value over INT15:88.
 		 */
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	alloc_ap_trampoline(physmap, &physmap_idx);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 *
 	 * This is especially confusing when it is much larger than the
 	 * memory size and is displayed as "realmem".
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 	 * the amount of memory in the system.
 	 */
 	if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	/*
 	 * The boot memory test is disabled by default, as it takes a
 	 * significant amount of time on large-memory systems, and is
 	 * unfriendly to virtual machines as it unnecessarily touches all
 	 * pages.
 	 *
 	 * A general name is used as the code may be extended to support
 	 * additional tests beyond the current "page present" test.
 	 */
 	memtest = 0;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N);
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ENTRIES) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == PHYS_AVAIL_ENTRIES) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	pmap_cmap3(0, 0);
 	
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
 }
 
 static void
 i386_kdb_init(void)
 {
 #ifdef DDB
 	db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 #endif
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 static void
 fixup_idt(void)
 {
 	struct gate_descriptor *ip;
 	uintptr_t off;
 	int x;
 
 	for (x = 0; x < NIDT; x++) {
 		ip = &idt[x];
 		if (ip->gd_type != SDT_SYS386IGT &&
 		    ip->gd_type != SDT_SYS386TGT)
 			continue;
 		off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
 		KASSERT(off >= (uintptr_t)start_exceptions &&
 		    off < (uintptr_t)end_exceptions,
 		    ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
 		off += setidt_disp;
 		MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
 		    off < PMAP_TRM_MAX_ADDRESS);
 		ip->gd_looffset = off;
 		ip->gd_hioffset = off >> 16;
 	}
 }
 
 static void
 i386_setidt1(void)
 {
 	int x;
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
 	    SEL_KPL));
 	setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
 	    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
 	    SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
 	    SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #ifdef XENHVM
 	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif
 }
 
 static void
 i386_setidt2(void)
 {
 
 	setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 }
 
 #if defined(DEV_ISA) && !defined(DEV_ATPIC)
 static void
 i386_setidt3(void)
 {
 
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 }
 #endif
 
 register_t
 init386(int first)
 {
 	struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 	int gsel_tss, metadata_missing, x, pa;
 	struct pcpu *pc;
 	struct xstate_hdr *xhdr;
 	caddr_t kmdp;
 	vm_offset_t addend;
 	size_t ucode_len;
 	int late_console;
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	if (bootinfo.bi_modulep) {
 		metadata_missing = 0;
 		addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
 		    PMAP_MAP_LOW : 0;
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
 		preload_bootstrap_relocate(addend);
 	} else {
 		metadata_missing = 1;
 	}
 
 	if (bootinfo.bi_envp != 0) {
 		addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
 		    PMAP_MAP_LOW : 0;
 		init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
 	} else {
 		init_static_kenv(NULL, 0);
 	}
 
 	/*
 	 * Re-evaluate CPU features if we loaded a microcode update.
 	 */
 	ucode_len = ucode_load_bsp(first);
 	if (ucode_len != 0) {
 		identify_cpu();
 		first = roundup2(first + ucode_len, PAGE_SIZE);
 	}
 
 	identify_hypervisor();
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * Make gdt memory segments.  All segments cover the full 4GB
 	 * of address space and permissions are enforced at page level.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt0[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
 	r_gdt.rd_base =  (int)gdt0;
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa, pa);
 	dpcpu_init((void *)first, 0);
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	/* Non-late cninit() and printf() can be moved up to here. */
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	i386_setidt1();
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the clock before the console so that console
 	 * initialization can use DELAY().
 	 */
 	clock_init();
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	i386_setidt2();
 	pmap_set_nx();
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	/* Initialize the tss (except for the final esp0) early for vm86. */
 	common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
 	    PAGE_SIZE - VM86_STACK_SPACE;
 	common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	/* Initialize the PIC early for vm86 calls. */
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	i386_setidt3();
 #endif
 #endif
 
 	/*
 	 * The console and kdb should be initialized even earlier than here,
 	 * but some console drivers don't work until after getmemsize().
 	 * Default to late console initialization to support these drivers.
 	 * This loses mainly printf()s in getmemsize() and early debugging.
 	 */
 	late_console = 1;
 	TUNABLE_INT_FETCH("debug.late_console", &late_console);
 	if (!late_console) {
 		cninit();
 		i386_kdb_init();
 	}
 
 	kmdp = preload_search_by_type("elf kernel");
 	link_elf_ireloc(kmdp);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	if (late_console)
 		cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 	if (late_console)
 		i386_kdb_init();
 
 	msgbufinit(msgbufp, msgbufsize);
 	npxinit(true);
 	/*
 	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
 	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 	PCPU_SET(curpcb, thread0.td_pcb);
 	/* Move esp0 in the tss to its final place. */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
 	PCPU_SET(kesp0, common_tss0.tss_esp0);
 	gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;	/* clear busy bit */
 	ltr(gsel_tss);
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_cr3 = pmap_get_kcr3();
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
 	cpu_probe_amdc1e();
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 
 	/* Location of kernel stack for locore */
 	return ((register_t)thread0.td_pcb);
 }
 
 static void
 machdep_init_trampoline(void)
 {
 	struct region_descriptor r_gdt, r_idt;
 	struct i386tss *tss;
 	char *copyout_buf, *trampoline, *tramp_stack_base;
 	int x;
 
 	gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
 	    M_NOWAIT | M_ZERO);
 	bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int)gdt;
 	lgdt(&r_gdt);
 
 	tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
 	    M_NOWAIT | M_ZERO);
 	bcopy(&common_tss0, tss, sizeof(struct i386tss));
 	gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
 	gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
 	gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tssp, tss);
 	ltr(GSEL(GPROC0_SEL, SEL_KPL));
 
 	trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
 	    M_NOWAIT);
 	bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
 	tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
 	PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
 	    VM86_STACK_SPACE);
 	tss[0].tss_esp0 = PCPU_GET(trampstk);
 
 	idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
 	bcopy(idt0, idt, sizeof(idt0));
 
 	/* Re-initialize new IDT since the handlers were relocated */
 	setidt_disp = trampoline - start_exceptions;
 	fixup_idt();
 
 	r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
 	r_idt.rd_base = (int)idt;
 	lidt(&r_idt);
 
 	/* dblfault TSS */
 	dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
 	dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
 	dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
 	    dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
 	    (int)dblfault_stack + PAGE_SIZE;
 	dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
 	    dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss->tss_cr3 = pmap_get_kcr3();
 	dblfault_tss->tss_eip = (int)dblfault_handler;
 	dblfault_tss->tss_eflags = PSL_KERNEL;
 	dblfault_tss->tss_ds = dblfault_tss->tss_es =
 	    dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
 	gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
 
 	/* make ldt memory segments */
 	ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
 	    M_NOWAIT | M_ZERO);
 	gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
 	gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < nitems(ldt_segs); x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
 	PCPU_SET(copyout_buf, copyout_buf);
 	copyout_init_tramp();
 }
 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
 
 #ifdef COMPAT_43
 static void
 i386_setup_lcall_gate(void)
 {
 	struct sysentvec *sv;
 	struct user_segment_descriptor desc;
 	u_int lcall_addr;
 
 	sv = &elf32_freebsd_sysvec;
 	lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
 
 	bzero(&desc, sizeof(desc));
 	desc.sd_type = SDT_MEMERA;
 	desc.sd_dpl = SEL_UPL;
 	desc.sd_p = 1;
 	desc.sd_def32 = 1;
 	desc.sd_gran = 1;
 	desc.sd_lolimit = 0xffff;
 	desc.sd_hilimit = 0xf;
 	desc.sd_lobase = lcall_addr;
 	desc.sd_hibase = lcall_addr >> 24;
 	bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
 }
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 static int
 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct bios_smap *smapbase;
 	struct bios_smap_xattr smap;
 	caddr_t kmdp;
 	uint32_t *smapattr;
 	int count, error, i;
 
 	/* Retrieve the system memory map from the loader. */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		return (0);
 	smapattr = (uint32_t *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 	count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 	error = 0;
 	for (i = 0; i < count; i++) {
 		smap.base = smapbase[i].base;
 		smap.length = smapbase[i].length;
 		smap.type = smapbase[i].type;
 		if (smapattr != NULL)
 			smap.xattr = smapattr[i];
 		else
 			smap.xattr = 0;
 		error = SYSCTL_OUT(req, &smap, sizeof(smap));
 	}
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(flags);
 	}
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused)
 {
 	struct region_descriptor r_idt;
 	struct gate_descriptor *new_idt;
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
 	if (tmp == 0)
 		panic("kmem_malloc returned 0");
 	tmp = round_page(tmp);
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor *)
 	    (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (u_int)new_idt;
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	lidt(&r_idt);
 	/* SMP machines do not need the F00F hack. */
 	idt = new_idt;
 	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_edi = tf->tf_edi;
 	pcb->pcb_esi = tf->tf_esi;
 	pcb->pcb_ebp = tf->tf_ebp;
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 	pcb->pcb_gs = rgs();
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if ((td->td_frame->tf_eflags & PSL_T) == 0) {
 		td->td_frame->tf_eflags |= PSL_T;
 		td->td_dbgflags |= TDB_STEP;
 	}
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	td->td_frame->tf_eflags &= ~PSL_T;
 	td->td_dbgflags &= ~TDB_STEP;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	npxgetregs(td);
 	if (cpu_fxsr)
 		npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	critical_enter();
 	if (cpu_fxsr)
 		npx_set_fpregs_xmm((struct save87 *)fpregs,
 		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 	npxuserinited(td);
 	critical_exit();
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_fs = mcp->mc_fs;
 	tp->tf_es = mcp->mc_es;
 	tp->tf_ds = mcp->mc_ds;
 	tp->tf_edi = mcp->mc_edi;
 	tp->tf_esi = mcp->mc_esi;
 	tp->tf_ebp = mcp->mc_ebp;
 	tp->tf_ebx = mcp->mc_ebx;
 	tp->tf_edx = mcp->mc_edx;
 	tp->tf_ecx = mcp->mc_ecx;
 	tp->tf_eax = mcp->mc_eax;
 	tp->tf_eip = mcp->mc_eip;
 	tp->tf_eflags = eflags;
 	tp->tf_esp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_gs = mcp->mc_gs;
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 		    xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(register_t dr6)
 {
         u_int32_t dr7;
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
 
         bp = dr6 & DBREG_DR6_BMASK;
         if (bp == 0) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
Index: head/sys/x86/cpufreq/hwpstate.c
===================================================================
--- head/sys/x86/cpufreq/hwpstate.c	(revision 356939)
+++ head/sys/x86/cpufreq/hwpstate.c	(revision 356940)
@@ -1,539 +1,543 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Nate Lawson
  * Copyright (c) 2004 Colin Percival
  * Copyright (c) 2004-2005 Bruno Durcot
  * Copyright (c) 2004 FUKUDA Nobuhiko
  * Copyright (c) 2009 Michael Reifenberger
  * Copyright (c) 2009 Norikatsu Shigemura
  * Copyright (c) 2008-2009 Gen Otsuji
  *
  * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
  * in various parts. The authors of these files are Nate Lawson,
  * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
  * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
  * Thank you.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted providing that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * For more info:
  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
  * 31116 Rev 3.20  February 04, 2009
  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
  * 41256 Rev 3.00 - July 07, 2008
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 
 #include <machine/md_var.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include "acpi_if.h"
 #include "cpufreq_if.h"
 
 #define	MSR_AMD_10H_11H_LIMIT	0xc0010061
 #define	MSR_AMD_10H_11H_CONTROL	0xc0010062
 #define	MSR_AMD_10H_11H_STATUS	0xc0010063
 #define	MSR_AMD_10H_11H_CONFIG	0xc0010064
 
 #define	AMD_10H_11H_MAX_STATES	16
 
 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */
 #define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
 #define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
 /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
 #define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
 #define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
 #define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)
 
 #define	AMD_17H_CUR_VID(msr)			(((msr) >> 14) & 0xFF)
 #define	AMD_17H_CUR_DID(msr)			(((msr) >> 8) & 0x3F)
 #define	AMD_17H_CUR_FID(msr)			((msr) & 0xFF)
 
 #define	HWPSTATE_DEBUG(dev, msg...)			\
 	do {						\
 		if (hwpstate_verbose)			\
 			device_printf(dev, msg);	\
 	} while (0)
 
 struct hwpstate_setting {
 	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
 	int	volts;		/* Voltage in mV. */
 	int	power;		/* Power consumed in mW. */
 	int	lat;		/* Transition latency in us. */
 	int	pstate_id;	/* P-State id */
 };
 
 struct hwpstate_softc {
 	device_t		dev;
 	struct hwpstate_setting	hwpstate_settings[AMD_10H_11H_MAX_STATES];
 	int			cfnum;
 };
 
 static void	hwpstate_identify(driver_t *driver, device_t parent);
 static int	hwpstate_probe(device_t dev);
 static int	hwpstate_attach(device_t dev);
 static int	hwpstate_detach(device_t dev);
 static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
 static int	hwpstate_get(device_t dev, struct cf_setting *cf);
 static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
 static int	hwpstate_type(device_t dev, int *type);
 static int	hwpstate_shutdown(device_t dev);
 static int	hwpstate_features(driver_t *driver, u_int *features);
 static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
 static int	hwpstate_get_info_from_msr(device_t dev);
 static int	hwpstate_goto_pstate(device_t dev, int pstate_id);
 
 static int	hwpstate_verbose;
 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN,
     &hwpstate_verbose, 0, "Debug hwpstate");
 
 static int	hwpstate_verify;
 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
     &hwpstate_verify, 0, "Verify P-state after setting");
 
 static device_method_t hwpstate_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	hwpstate_identify),
 	DEVMETHOD(device_probe,		hwpstate_probe),
 	DEVMETHOD(device_attach,	hwpstate_attach),
 	DEVMETHOD(device_detach,	hwpstate_detach),
 	DEVMETHOD(device_shutdown,	hwpstate_shutdown),
 
 	/* cpufreq interface */
 	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
 	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
 	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
 	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),
 
 	/* ACPI interface */
 	DEVMETHOD(acpi_get_features,	hwpstate_features),
 
 	{0, 0}
 };
 
 static devclass_t hwpstate_devclass;
 static driver_t hwpstate_driver = {
 	"hwpstate",
 	hwpstate_methods,
 	sizeof(struct hwpstate_softc),
 };
 
 DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
 
 /*
  * Go to Px-state on all cpus considering the limit.
  */
 static int
 hwpstate_goto_pstate(device_t dev, int id)
 {
 	sbintime_t sbt;
 	uint64_t msr;
 	int cpu, i, j, limit;
 
 	/* get the current pstate limit */
 	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 	limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
 	if (limit > id)
 		id = limit;
 
 	cpu = curcpu;
 	HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
 	/* Go To Px-state */
 	wrmsr(MSR_AMD_10H_11H_CONTROL, id);
 
 	/*
 	 * We are going to the same Px-state on all cpus.
 	 * Probably should take _PSD into account.
 	 */
 	CPU_FOREACH(i) {
 		if (i == cpu)
 			continue;
 
 		/* Bind to each cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, i);
 		thread_unlock(curthread);
 		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
 		/* Go To Px-state */
 		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
 	}
 
 	/*
 	 * Verify whether each core is in the requested P-state.
 	 */
 	if (hwpstate_verify) {
 		CPU_FOREACH(i) {
 			thread_lock(curthread);
 			sched_bind(curthread, i);
 			thread_unlock(curthread);
 			/* wait loop (100*100 usec is enough ?) */
 			for (j = 0; j < 100; j++) {
 				/* get the result. not assure msr=id */
 				msr = rdmsr(MSR_AMD_10H_11H_STATUS);
 				if (msr == id)
 					break;
 				sbt = SBT_1MS / 10;
 				tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
 				    sbt >> tc_precexp, 0);
 			}
 			HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
 			    (int)msr, i);
 			if (msr != id) {
 				HWPSTATE_DEBUG(dev,
 				    "error: loop is not enough.\n");
 				return (ENXIO);
 			}
 		}
 	}
 
 	return (0);
 }
 
 static int
 hwpstate_set(device_t dev, const struct cf_setting *cf)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting *set;
 	int i;
 
 	if (cf == NULL)
 		return (EINVAL);
 	sc = device_get_softc(dev);
 	set = sc->hwpstate_settings;
 	for (i = 0; i < sc->cfnum; i++)
 		if (CPUFREQ_CMP(cf->freq, set[i].freq))
 			break;
 	if (i == sc->cfnum)
 		return (EINVAL);
 
 	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
 }
 
 static int
 hwpstate_get(device_t dev, struct cf_setting *cf)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting set;
 	uint64_t msr;
 
 	sc = device_get_softc(dev);
 	if (cf == NULL)
 		return (EINVAL);
 	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
 	if (msr >= sc->cfnum)
 		return (EINVAL);
 	set = sc->hwpstate_settings[msr];
 
 	cf->freq = set.freq;
 	cf->volts = set.volts;
 	cf->power = set.power;
 	cf->lat = set.lat;
 	cf->dev = dev;
 	return (0);
 }
 
 static int
 hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting set;
 	int i;
 
 	if (sets == NULL || count == NULL)
 		return (EINVAL);
 	sc = device_get_softc(dev);
 	if (*count < sc->cfnum)
 		return (E2BIG);
 	for (i = 0; i < sc->cfnum; i++, sets++) {
 		set = sc->hwpstate_settings[i];
 		sets->freq = set.freq;
 		sets->volts = set.volts;
 		sets->power = set.power;
 		sets->lat = set.lat;
 		sets->dev = dev;
 	}
 	*count = sc->cfnum;
 
 	return (0);
 }
 
 static int
 hwpstate_type(device_t dev, int *type)
 {
 
 	if (type == NULL)
 		return (EINVAL);
 
 	*type = CPUFREQ_TYPE_ABSOLUTE;
 	return (0);
 }
 
 static void
 hwpstate_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "hwpstate", -1) != NULL)
 		return;
 
-	if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10)
+	if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
+	    cpu_vendor_id != CPU_VENDOR_HYGON)
 		return;
 
 	/*
 	 * Check if hardware pstate enable bit is set.
 	 */
 	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
 		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
 		return;
 	}
 
 	if (resource_disabled("hwpstate", 0))
 		return;
 
 	if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL)
 		device_printf(parent, "hwpstate: add child failed\n");
 }
 
 static int
 hwpstate_probe(device_t dev)
 {
 	struct hwpstate_softc *sc;
 	device_t perf_dev;
 	uint64_t msr;
 	int error, type;
 
 	/*
 	 * Only hwpstate0.
 	 * It goes well with acpi_throttle.
 	 */
 	if (device_get_unit(dev) != 0)
 		return (ENXIO);
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/*
 	 * Check if acpi_perf has INFO only flag.
 	 */
 	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1);
 	error = TRUE;
 	if (perf_dev && device_is_attached(perf_dev)) {
 		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
 		if (error == 0) {
 			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
 				/*
 				 * If acpi_perf doesn't have INFO_ONLY flag,
 				 * it will take care of pstate transitions.
 				 */
 				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
 				return (ENXIO);
 			} else {
 				/*
 				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
 				 * we can get _PSS info from acpi_perf
 				 * without going into ACPI.
 				 */
 				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
 				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
 			}
 		}
 	}
 
 	if (error == 0) {
 		/*
 		 * Now we get _PSS info from acpi_perf without error.
 		 * Let's check it.
 		 */
 		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
 			HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
 			    " count mismatch\n", (intmax_t)msr, sc->cfnum);
 			error = TRUE;
 		}
 	}
 
 	/*
 	 * If we cannot get info from acpi_perf,
 	 * Let's get info from MSRs.
 	 */
 	if (error)
 		error = hwpstate_get_info_from_msr(dev);
 	if (error)
 		return (error);
 
 	device_set_desc(dev, "Cool`n'Quiet 2.0");
 	return (0);
 }
 
 static int
 hwpstate_attach(device_t dev)
 {
 
 	return (cpufreq_register(dev));
 }
 
 static int
 hwpstate_get_info_from_msr(device_t dev)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting *hwpstate_set;
 	uint64_t msr;
 	int family, i, fid, did;
 
 	family = CPUID_TO_FAMILY(cpu_id);
 	sc = device_get_softc(dev);
 	/* Get pstate count */
 	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
 	hwpstate_set = sc->hwpstate_settings;
 	for (i = 0; i < sc->cfnum; i++) {
 		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
 		if ((msr & ((uint64_t)1 << 63)) == 0) {
 			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
 			return (ENXIO);
 		}
 		did = AMD_10H_11H_CUR_DID(msr);
 		fid = AMD_10H_11H_CUR_FID(msr);
 
 		/* Convert fid/did to frequency. */
 		switch (family) {
 		case 0x11:
 			hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did;
 			break;
 		case 0x10:
 		case 0x12:
 		case 0x15:
 		case 0x16:
 			hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did;
 			break;
 		case 0x17:
+		case 0x18:
 			did = AMD_17H_CUR_DID(msr);
 			if (did == 0) {
 				HWPSTATE_DEBUG(dev, "unexpected did: 0\n");
 				did = 1;
 			}
 			fid = AMD_17H_CUR_FID(msr);
 			hwpstate_set[i].freq = (200 * fid) / did;
 			break;
 		default:
-			HWPSTATE_DEBUG(dev, "get_info_from_msr: AMD family"
-			    " 0x%02x CPUs are not supported yet\n", family);
+			HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family"
+			    " 0x%02x CPUs are not supported yet\n",
+			    cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD",
+			    family);
 			return (ENXIO);
 		}
 		hwpstate_set[i].pstate_id = i;
 		/* There was volts calculation, but deleted it. */
 		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
 		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
 		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
 	}
 	return (0);
 }
 
 static int
 hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
 {
 	struct hwpstate_softc *sc;
 	struct cf_setting *perf_set;
 	struct hwpstate_setting *hwpstate_set;
 	int count, error, i;
 
 	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
 	if (perf_set == NULL) {
 		HWPSTATE_DEBUG(dev, "nomem\n");
 		return (ENOMEM);
 	}
 	/*
 	 * Fetch settings from acpi_perf.
 	 * Now it is attached, and has info only flag.
 	 */
 	count = MAX_SETTINGS;
 	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
 	if (error) {
 		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
 		goto out;
 	}
 	sc = device_get_softc(dev);
 	sc->cfnum = count;
 	hwpstate_set = sc->hwpstate_settings;
 	for (i = 0; i < count; i++) {
 		if (i == perf_set[i].spec[0]) {
 			hwpstate_set[i].pstate_id = i;
 			hwpstate_set[i].freq = perf_set[i].freq;
 			hwpstate_set[i].volts = perf_set[i].volts;
 			hwpstate_set[i].power = perf_set[i].power;
 			hwpstate_set[i].lat = perf_set[i].lat;
 		} else {
 			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
 			error = ENXIO;
 			goto out;
 		}
 	}
 out:
 	if (perf_set)
 		free(perf_set, M_TEMP);
 	return (error);
 }
 
 static int
 hwpstate_detach(device_t dev)
 {
 
 	hwpstate_goto_pstate(dev, 0);
 	return (cpufreq_unregister(dev));
 }
 
 static int
 hwpstate_shutdown(device_t dev)
 {
 
 	/* hwpstate_goto_pstate(dev, 0); */
 	return (0);
 }
 
 static int
 hwpstate_features(driver_t *driver, u_int *features)
 {
 
 	/* Notify the ACPI CPU that we support direct access to MSRs */
 	*features = ACPI_CAP_PERF_MSRS;
 	return (0);
 }
Index: head/sys/x86/include/cputypes.h
===================================================================
--- head/sys/x86/include/cputypes.h	(revision 356939)
+++ head/sys/x86/include/cputypes.h	(revision 356940)
@@ -1,49 +1,50 @@
 /*-
  * Copyright (c) 1993 Christopher G. Demetriou
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _X86_CPUTYPES_H_
 #define	_X86_CPUTYPES_H_
 
 /*
  * Vendors of processor.
  */
 #define	CPU_VENDOR_NSC		0x100b		/* NSC */
 #define	CPU_VENDOR_IBM		0x1014		/* IBM */
 #define	CPU_VENDOR_AMD		0x1022		/* AMD */
 #define	CPU_VENDOR_SIS		0x1039		/* SiS */
 #define	CPU_VENDOR_UMC		0x1060		/* UMC */
 #define	CPU_VENDOR_NEXGEN	0x1074		/* Nexgen */
 #define	CPU_VENDOR_CYRIX	0x1078		/* Cyrix */
 #define	CPU_VENDOR_IDT		0x111d		/* Centaur/IDT/VIA */
 #define	CPU_VENDOR_TRANSMETA	0x1279		/* Transmeta */
 #define	CPU_VENDOR_INTEL	0x8086		/* Intel */
 #define	CPU_VENDOR_RISE		0xdead2bad	/* Rise */
 #define	CPU_VENDOR_CENTAUR	CPU_VENDOR_IDT
+#define	CPU_VENDOR_HYGON	0x1d94		/* Hygon */
 
 #endif /* !_X86_CPUTYPES_H_ */
Index: head/sys/x86/include/specialreg.h
===================================================================
--- head/sys/x86/include/specialreg.h	(revision 356939)
+++ head/sys/x86/include/specialreg.h	(revision 356940)
@@ -1,1165 +1,1166 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)specialreg.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SPECIALREG_H_
 #define	_MACHINE_SPECIALREG_H_
 
 /*
  * Bits in 386 special registers:
  */
 #define	CR0_PE	0x00000001	/* Protected mode Enable */
 #define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
 #define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
 #define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
 #define	CR0_PG	0x80000000	/* PaGing enable */
 
 /*
  * Bits in 486 special registers:
  */
 #define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
 #define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
 							   all modes) */
 #define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
 #define	CR0_NW  0x20000000	/* Not Write-through */
 #define	CR0_CD  0x40000000	/* Cache Disable */
 
 #define	CR3_PCID_SAVE 0x8000000000000000
 #define	CR3_PCID_MASK 0xfff
 
 /*
  * Bits in PPro special registers
  */
 #define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
 #define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
 #define	CR4_TSD	0x00000004	/* Time stamp disable */
 #define	CR4_DE	0x00000008	/* Debugging extensions */
 #define	CR4_PSE	0x00000010	/* Page size extensions */
 #define	CR4_PAE	0x00000020	/* Physical address extension */
 #define	CR4_MCE	0x00000040	/* Machine check enable */
 #define	CR4_PGE	0x00000080	/* Page global enable */
 #define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
 #define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
 #define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
 #define	CR4_UMIP 0x00000800	/* User Mode Instruction Prevention */
 #define	CR4_VMXE 0x00002000	/* enable VMX operation (Intel-specific) */
 #define	CR4_FSGSBASE 0x00010000	/* Enable FS/GS BASE accessing instructions */
 #define	CR4_PCIDE 0x00020000	/* Enable Context ID */
 #define	CR4_XSAVE 0x00040000	/* XSETBV/XGETBV */
 #define	CR4_SMEP 0x00100000	/* Supervisor-Mode Execution Prevention */
 #define	CR4_SMAP 0x00200000	/* Supervisor-Mode Access Prevention */
 #define	CR4_PKE	0x00400000	/* Protection Keys Enable */
 
 /*
  * Bits in AMD64 special registers.  EFER is 64 bits wide.
  */
 #define	EFER_SCE 0x000000001	/* System Call Extensions (R/W) */
 #define	EFER_LME 0x000000100	/* Long mode enable (R/W) */
 #define	EFER_LMA 0x000000400	/* Long mode active (R) */
 #define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
 #define	EFER_SVM 0x000001000	/* SVM enable bit for AMD, reserved for Intel */
 #define	EFER_LMSLE 0x000002000	/* Long Mode Segment Limit Enable */
 #define	EFER_FFXSR 0x000004000	/* Fast FXSAVE/FSRSTOR */
 #define	EFER_TCE   0x000008000	/* Translation Cache Extension */
 #define	EFER_MCOMMIT	0x00020000	/* Enable MCOMMIT (AMD) */
 
 /*
  * Intel Extended Features registers
  */
 #define	XCR0	0		/* XFEATURE_ENABLED_MASK register */
 
 #define	XFEATURE_ENABLED_X87		0x00000001
 #define	XFEATURE_ENABLED_SSE		0x00000002
 #define	XFEATURE_ENABLED_YMM_HI128	0x00000004
 #define	XFEATURE_ENABLED_AVX		XFEATURE_ENABLED_YMM_HI128
 #define	XFEATURE_ENABLED_BNDREGS	0x00000008
 #define	XFEATURE_ENABLED_BNDCSR		0x00000010
 #define	XFEATURE_ENABLED_OPMASK		0x00000020
 #define	XFEATURE_ENABLED_ZMM_HI256	0x00000040
 #define	XFEATURE_ENABLED_HI16_ZMM	0x00000080
 
 #define	XFEATURE_AVX					\
     (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
 #define	XFEATURE_AVX512						\
     (XFEATURE_ENABLED_OPMASK | XFEATURE_ENABLED_ZMM_HI256 |	\
     XFEATURE_ENABLED_HI16_ZMM)
 #define	XFEATURE_MPX					\
     (XFEATURE_ENABLED_BNDREGS | XFEATURE_ENABLED_BNDCSR)
 
 /*
  * CPUID instruction features register
  */
 #define	CPUID_FPU	0x00000001
 #define	CPUID_VME	0x00000002
 #define	CPUID_DE	0x00000004
 #define	CPUID_PSE	0x00000008
 #define	CPUID_TSC	0x00000010
 #define	CPUID_MSR	0x00000020
 #define	CPUID_PAE	0x00000040
 #define	CPUID_MCE	0x00000080
 #define	CPUID_CX8	0x00000100
 #define	CPUID_APIC	0x00000200
 #define	CPUID_B10	0x00000400
 #define	CPUID_SEP	0x00000800
 #define	CPUID_MTRR	0x00001000
 #define	CPUID_PGE	0x00002000
 #define	CPUID_MCA	0x00004000
 #define	CPUID_CMOV	0x00008000
 #define	CPUID_PAT	0x00010000
 #define	CPUID_PSE36	0x00020000
 #define	CPUID_PSN	0x00040000
 #define	CPUID_CLFSH	0x00080000
 #define	CPUID_B20	0x00100000
 #define	CPUID_DS	0x00200000
 #define	CPUID_ACPI	0x00400000
 #define	CPUID_MMX	0x00800000
 #define	CPUID_FXSR	0x01000000
 #define	CPUID_SSE	0x02000000
 #define	CPUID_XMM	0x02000000
 #define	CPUID_SSE2	0x04000000
 #define	CPUID_SS	0x08000000
 #define	CPUID_HTT	0x10000000
 #define	CPUID_TM	0x20000000
 #define	CPUID_IA64	0x40000000
 #define	CPUID_PBE	0x80000000
 
 #define	CPUID2_SSE3	0x00000001
 #define	CPUID2_PCLMULQDQ 0x00000002
 #define	CPUID2_DTES64	0x00000004
 #define	CPUID2_MON	0x00000008
 #define	CPUID2_DS_CPL	0x00000010
 #define	CPUID2_VMX	0x00000020
 #define	CPUID2_SMX	0x00000040
 #define	CPUID2_EST	0x00000080
 #define	CPUID2_TM2	0x00000100
 #define	CPUID2_SSSE3	0x00000200
 #define	CPUID2_CNXTID	0x00000400
 #define	CPUID2_SDBG	0x00000800
 #define	CPUID2_FMA	0x00001000
 #define	CPUID2_CX16	0x00002000
 #define	CPUID2_XTPR	0x00004000
 #define	CPUID2_PDCM	0x00008000
 #define	CPUID2_PCID	0x00020000
 #define	CPUID2_DCA	0x00040000
 #define	CPUID2_SSE41	0x00080000
 #define	CPUID2_SSE42	0x00100000
 #define	CPUID2_X2APIC	0x00200000
 #define	CPUID2_MOVBE	0x00400000
 #define	CPUID2_POPCNT	0x00800000
 #define	CPUID2_TSCDLT	0x01000000
 #define	CPUID2_AESNI	0x02000000
 #define	CPUID2_XSAVE	0x04000000
 #define	CPUID2_OSXSAVE	0x08000000
 #define	CPUID2_AVX	0x10000000
 #define	CPUID2_F16C	0x20000000
 #define	CPUID2_RDRAND	0x40000000
 #define	CPUID2_HV	0x80000000
 
 /* Intel Processor Trace CPUID. */
 
 /* Leaf 0 ebx. */
 #define	CPUPT_CR3		(1 << 0)	/* CR3 Filtering Support */
 #define	CPUPT_PSB		(1 << 1)	/* Configurable PSB and Cycle-Accurate Mode Supported */
 #define	CPUPT_IPF		(1 << 2)	/* IP Filtering and TraceStop supported */
 #define	CPUPT_MTC		(1 << 3)	/* MTC Supported */
 #define	CPUPT_PRW		(1 << 4)	/* PTWRITE Supported */
 #define	CPUPT_PWR		(1 << 5)	/* Power Event Trace Supported */
 
 /* Leaf 0 ecx. */
 #define	CPUPT_TOPA		(1 << 0)	/* ToPA Output Supported */
 #define	CPUPT_TOPA_MULTI	(1 << 1)	/* ToPA Tables Allow Multiple Output Entries */
 #define	CPUPT_SINGLE		(1 << 2)	/* Single-Range Output Supported */
 #define	CPUPT_TT_OUT		(1 << 3)	/* Output to Trace Transport Subsystem Supported */
 #define	CPUPT_LINEAR_IP		(1 << 31)	/* IP Payloads are Linear IP, otherwise IP is effective */
 
 /* Leaf 1 eax. */
 #define	CPUPT_NADDR_S		0	/* Number of Address Ranges */
 #define	CPUPT_NADDR_M		(0x7 << CPUPT_NADDR_S)
 #define	CPUPT_MTC_BITMAP_S	16	/* Bitmap of supported MTC Period Encodings */
 #define	CPUPT_MTC_BITMAP_M	(0xffff << CPUPT_MTC_BITMAP_S)
 
 /* Leaf 1 ebx. */
 #define	CPUPT_CT_BITMAP_S	0	/* Bitmap of supported Cycle Threshold values */
 #define	CPUPT_CT_BITMAP_M	(0xffff << CPUPT_CT_BITMAP_S)
 #define	CPUPT_PFE_BITMAP_S	16	/* Bitmap of supported Configurable PSB Frequency encoding */
 #define	CPUPT_PFE_BITMAP_M	(0xffff << CPUPT_PFE_BITMAP_S)
 
 /*
  * Important bits in the AMD extended cpuid flags
  */
 #define	AMDID_SYSCALL	0x00000800
 #define	AMDID_MP	0x00080000
 #define	AMDID_NX	0x00100000
 #define	AMDID_EXT_MMX	0x00400000
 #define	AMDID_FFXSR	0x02000000
 #define	AMDID_PAGE1GB	0x04000000
 #define	AMDID_RDTSCP	0x08000000
 #define	AMDID_LM	0x20000000
 #define	AMDID_EXT_3DNOW	0x40000000
 #define	AMDID_3DNOW	0x80000000
 
 #define	AMDID2_LAHF	0x00000001
 #define	AMDID2_CMP	0x00000002
 #define	AMDID2_SVM	0x00000004
 #define	AMDID2_EXT_APIC	0x00000008
 #define	AMDID2_CR8	0x00000010
 #define	AMDID2_ABM	0x00000020
 #define	AMDID2_SSE4A	0x00000040
 #define	AMDID2_MAS	0x00000080
 #define	AMDID2_PREFETCH	0x00000100
 #define	AMDID2_OSVW	0x00000200
 #define	AMDID2_IBS	0x00000400
 #define	AMDID2_XOP	0x00000800
 #define	AMDID2_SKINIT	0x00001000
 #define	AMDID2_WDT	0x00002000
 #define	AMDID2_LWP	0x00008000
 #define	AMDID2_FMA4	0x00010000
 #define	AMDID2_TCE	0x00020000
 #define	AMDID2_NODE_ID	0x00080000
 #define	AMDID2_TBM	0x00200000
 #define	AMDID2_TOPOLOGY	0x00400000
 #define	AMDID2_PCXC	0x00800000
 #define	AMDID2_PNXC	0x01000000
 #define	AMDID2_DBE	0x04000000
 #define	AMDID2_PTSC	0x08000000
 #define	AMDID2_PTSCEL2I	0x10000000
 #define	AMDID2_MWAITX	0x20000000
 
 /*
  * CPUID instruction 1 eax info
  */
 #define	CPUID_STEPPING		0x0000000f
 #define	CPUID_MODEL		0x000000f0
 #define	CPUID_FAMILY		0x00000f00
 #define	CPUID_EXT_MODEL		0x000f0000
 #define	CPUID_EXT_FAMILY	0x0ff00000
 #ifdef __i386__
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     ((((id) & CPUID_FAMILY) >= 0x600) ? \
     (((id) & CPUID_EXT_MODEL) >> 12) : 0))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     ((((id) & CPUID_FAMILY) == 0xf00) ? \
     (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
 #else
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     (((id) & CPUID_EXT_MODEL) >> 12))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     (((id) & CPUID_EXT_FAMILY) >> 20))
 #endif
 
 /*
  * CPUID instruction 1 ebx info
  */
 #define	CPUID_BRAND_INDEX	0x000000ff
 #define	CPUID_CLFUSH_SIZE	0x0000ff00
 #define	CPUID_HTT_CORES		0x00ff0000
 #define	CPUID_LOCAL_APIC_ID	0xff000000
 
 /*
  * CPUID instruction 5 info
  */
 #define	CPUID5_MON_MIN_SIZE	0x0000ffff	/* eax */
 #define	CPUID5_MON_MAX_SIZE	0x0000ffff	/* ebx */
 #define	CPUID5_MON_MWAIT_EXT	0x00000001	/* ecx */
 #define	CPUID5_MWAIT_INTRBREAK	0x00000002	/* ecx */
 
 /*
  * MWAIT cpu power states.  Lower 4 bits are sub-states.
  */
 #define	MWAIT_C0	0xf0
 #define	MWAIT_C1	0x00
 #define	MWAIT_C2	0x10
 #define	MWAIT_C3	0x20
 #define	MWAIT_C4	0x30
 
 /*
  * MWAIT extensions.
  */
 /* Interrupt breaks MWAIT even when masked. */
 #define	MWAIT_INTRBREAK		0x00000001
 
 /*
  * CPUID leaf 6: Thermal and Power management.
  */
 /* Eax. */
 #define	CPUTPM1_SENSOR			0x00000001
 #define	CPUTPM1_TURBO			0x00000002
 #define	CPUTPM1_ARAT			0x00000004
 #define	CPUTPM1_PLN			0x00000010
 #define	CPUTPM1_ECMD			0x00000020
 #define	CPUTPM1_PTM			0x00000040
 #define	CPUTPM1_HWP			0x00000080
 #define	CPUTPM1_HWP_NOTIFICATION	0x00000100
 #define	CPUTPM1_HWP_ACTIVITY_WINDOW	0x00000200
 #define	CPUTPM1_HWP_PERF_PREF		0x00000400
 #define	CPUTPM1_HWP_PKG			0x00000800
 #define	CPUTPM1_HDC			0x00002000
 #define	CPUTPM1_TURBO30			0x00004000
 #define	CPUTPM1_HWP_CAPABILITIES	0x00008000
 #define	CPUTPM1_HWP_PECI_OVR		0x00010000
 #define	CPUTPM1_HWP_FLEXIBLE		0x00020000
 #define	CPUTPM1_HWP_FAST_MSR		0x00040000
 #define	CPUTPM1_HWP_IGN_IDLE		0x00100000
 
 /* Ebx. */
 #define	CPUTPM_B_NSENSINTTHRESH		0x0000000f
 
 /* Ecx. */
 #define	CPUID_PERF_STAT			0x00000001
 #define	CPUID_PERF_BIAS			0x00000008
 
 /* 
  * CPUID instruction 0xb ebx info.
  */
 #define	CPUID_TYPE_INVAL	0
 #define	CPUID_TYPE_SMT		1
 #define	CPUID_TYPE_CORE		2
 
 /*
  * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
  */
 #define	CPUID_EXTSTATE_XSAVEOPT	0x00000001
 #define	CPUID_EXTSTATE_XSAVEC	0x00000002
 #define	CPUID_EXTSTATE_XINUSE	0x00000004
 #define	CPUID_EXTSTATE_XSAVES	0x00000008
 
 /*
  * AMD extended function 8000_0007h ebx info
  */
 #define	AMDRAS_MCA_OF_RECOV	0x00000001
 #define	AMDRAS_SUCCOR		0x00000002
 #define	AMDRAS_HW_ASSERT	0x00000004
 #define	AMDRAS_SCALABLE_MCA	0x00000008
 #define	AMDRAS_PFEH_SUPPORT	0x00000010
 
 /*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001
 #define	AMDPM_FID		0x00000002
 #define	AMDPM_VID		0x00000004
 #define	AMDPM_TTP		0x00000008
 #define	AMDPM_TM		0x00000010
 #define	AMDPM_STC		0x00000020
 #define	AMDPM_100MHZ_STEPS	0x00000040
 #define	AMDPM_HW_PSTATE		0x00000080
 #define	AMDPM_TSC_INVARIANT	0x00000100
 #define	AMDPM_CPB		0x00000200
 
 /*
  * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)
  */
 #define	AMDFEID_CLZERO		0x00000001
 #define	AMDFEID_IRPERF		0x00000002
 #define	AMDFEID_XSAVEERPTR	0x00000004
 #define	AMDFEID_RDPRU		0x00000010
 #define	AMDFEID_MCOMMIT		0x00000100
 #define	AMDFEID_WBNOINVD	0x00000200
 #define	AMDFEID_IBPB		0x00001000
 #define	AMDFEID_IBRS		0x00004000
 #define	AMDFEID_STIBP		0x00008000
 /* The below are only defined if the corresponding base feature above exists. */
 #define	AMDFEID_IBRS_ALWAYSON	0x00010000
 #define	AMDFEID_STIBP_ALWAYSON	0x00020000
 #define	AMDFEID_PREFER_IBRS	0x00040000
 #define	AMDFEID_SSBD		0x01000000
 /* SSBD via MSRC001_011F instead of MSR 0x48: */
 #define	AMDFEID_VIRT_SSBD	0x02000000
 #define	AMDFEID_SSB_NO		0x04000000
 
 /*
  * AMD extended function 8000_0008h ecx info
  */
 #define	AMDID_CMP_CORES		0x000000ff
 #define	AMDID_COREID_SIZE	0x0000f000
 #define	AMDID_COREID_SIZE_SHIFT	12
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 ebx info
  */
 #define	CPUID_STDEXT_FSGSBASE	0x00000001
 #define	CPUID_STDEXT_TSC_ADJUST	0x00000002
 #define	CPUID_STDEXT_SGX	0x00000004
 #define	CPUID_STDEXT_BMI1	0x00000008
 #define	CPUID_STDEXT_HLE	0x00000010
 #define	CPUID_STDEXT_AVX2	0x00000020
 #define	CPUID_STDEXT_FDP_EXC	0x00000040
 #define	CPUID_STDEXT_SMEP	0x00000080
 #define	CPUID_STDEXT_BMI2	0x00000100
 #define	CPUID_STDEXT_ERMS	0x00000200
 #define	CPUID_STDEXT_INVPCID	0x00000400
 #define	CPUID_STDEXT_RTM	0x00000800
 #define	CPUID_STDEXT_PQM	0x00001000
 #define	CPUID_STDEXT_NFPUSG	0x00002000
 #define	CPUID_STDEXT_MPX	0x00004000
 #define	CPUID_STDEXT_PQE	0x00008000
 #define	CPUID_STDEXT_AVX512F	0x00010000
 #define	CPUID_STDEXT_AVX512DQ	0x00020000
 #define	CPUID_STDEXT_RDSEED	0x00040000
 #define	CPUID_STDEXT_ADX	0x00080000
 #define	CPUID_STDEXT_SMAP	0x00100000
 #define	CPUID_STDEXT_AVX512IFMA	0x00200000
 /* Formerly PCOMMIT */
 #define	CPUID_STDEXT_CLFLUSHOPT	0x00800000
 #define	CPUID_STDEXT_CLWB	0x01000000
 #define	CPUID_STDEXT_PROCTRACE	0x02000000
 #define	CPUID_STDEXT_AVX512PF	0x04000000
 #define	CPUID_STDEXT_AVX512ER	0x08000000
 #define	CPUID_STDEXT_AVX512CD	0x10000000
 #define	CPUID_STDEXT_SHA	0x20000000
 #define	CPUID_STDEXT_AVX512BW	0x40000000
 #define	CPUID_STDEXT_AVX512VL	0x80000000
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info
  */
 #define	CPUID_STDEXT2_PREFETCHWT1 	0x00000001
 #define	CPUID_STDEXT2_AVX512VBMI	0x00000002
 #define	CPUID_STDEXT2_UMIP		0x00000004
 #define	CPUID_STDEXT2_PKU		0x00000008
 #define	CPUID_STDEXT2_OSPKE		0x00000010
 #define	CPUID_STDEXT2_WAITPKG		0x00000020
 #define	CPUID_STDEXT2_AVX512VBMI2	0x00000040
 #define	CPUID_STDEXT2_GFNI		0x00000100
 #define	CPUID_STDEXT2_VAES		0x00000200
 #define	CPUID_STDEXT2_VPCLMULQDQ	0x00000400
 #define	CPUID_STDEXT2_AVX512VNNI	0x00000800
 #define	CPUID_STDEXT2_AVX512BITALG	0x00001000
 #define	CPUID_STDEXT2_AVX512VPOPCNTDQ	0x00004000
 #define	CPUID_STDEXT2_RDPID		0x00400000
 #define	CPUID_STDEXT2_CLDEMOTE		0x02000000
 #define	CPUID_STDEXT2_MOVDIRI		0x08000000
 #define	CPUID_STDEXT2_MOVDIR64B		0x10000000
 #define	CPUID_STDEXT2_ENQCMD		0x20000000
 #define	CPUID_STDEXT2_SGXLC		0x40000000
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 edx info
  */
 #define	CPUID_STDEXT3_AVX5124VNNIW	0x00000004
 #define	CPUID_STDEXT3_AVX5124FMAPS	0x00000008
 #define	CPUID_STDEXT3_AVX512VP2INTERSECT	0x00000100
 #define	CPUID_STDEXT3_MD_CLEAR		0x00000400
 #define	CPUID_STDEXT3_TSXFA		0x00002000
 #define	CPUID_STDEXT3_PCONFIG		0x00040000
 #define	CPUID_STDEXT3_IBPB		0x04000000
 #define	CPUID_STDEXT3_STIBP		0x08000000
 #define	CPUID_STDEXT3_L1D_FLUSH		0x10000000
 #define	CPUID_STDEXT3_ARCH_CAP		0x20000000
 #define	CPUID_STDEXT3_CORE_CAP		0x40000000
 #define	CPUID_STDEXT3_SSBD		0x80000000
 
 /* MSR IA32_ARCH_CAP(ABILITIES) bits */
 #define	IA32_ARCH_CAP_RDCL_NO	0x00000001
 #define	IA32_ARCH_CAP_IBRS_ALL	0x00000002
 #define	IA32_ARCH_CAP_RSBA	0x00000004
 #define	IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY	0x00000008
 #define	IA32_ARCH_CAP_SSB_NO	0x00000010
 #define	IA32_ARCH_CAP_MDS_NO	0x00000020
 #define	IA32_ARCH_CAP_IF_PSCHANGE_MC_NO	0x00000040
 #define	IA32_ARCH_CAP_TSX_CTRL	0x00000080
 #define	IA32_ARCH_CAP_TAA_NO	0x00000100
 
 /* MSR IA32_TSX_CTRL bits */
 #define	IA32_TSX_CTRL_RTM_DISABLE	0x00000001
 #define	IA32_TSX_CTRL_TSX_CPUID_CLEAR	0x00000002
 
 /*
  * CPUID manufacturers identifiers
  */
 #define	AMD_VENDOR_ID		"AuthenticAMD"
 #define	CENTAUR_VENDOR_ID	"CentaurHauls"
 #define	CYRIX_VENDOR_ID		"CyrixInstead"
 #define	INTEL_VENDOR_ID		"GenuineIntel"
 #define	NEXGEN_VENDOR_ID	"NexGenDriven"
 #define	NSC_VENDOR_ID		"Geode by NSC"
 #define	RISE_VENDOR_ID		"RiseRiseRise"
 #define	SIS_VENDOR_ID		"SiS SiS SiS "
 #define	TRANSMETA_VENDOR_ID	"GenuineTMx86"
 #define	UMC_VENDOR_ID		"UMC UMC UMC "
+#define	HYGON_VENDOR_ID		"HygonGenuine"
 
 /*
  * Model-specific registers for the i386 family
  */
 #define	MSR_P5_MC_ADDR		0x000
 #define	MSR_P5_MC_TYPE		0x001
 #define	MSR_TSC			0x010
 #define	MSR_P5_CESR		0x011
 #define	MSR_P5_CTR0		0x012
 #define	MSR_P5_CTR1		0x013
 #define	MSR_IA32_PLATFORM_ID	0x017
 #define	MSR_APICBASE		0x01b
 #define	MSR_EBL_CR_POWERON	0x02a
 #define	MSR_TEST_CTL		0x033
 #define	MSR_IA32_FEATURE_CONTROL 0x03a
 #define	MSR_IA32_SPEC_CTRL	0x048
 #define	MSR_IA32_PRED_CMD	0x049
 #define	MSR_BIOS_UPDT_TRIG	0x079
 #define	MSR_BBL_CR_D0		0x088
 #define	MSR_BBL_CR_D1		0x089
 #define	MSR_BBL_CR_D2		0x08a
 #define	MSR_BIOS_SIGN		0x08b
 #define	MSR_PERFCTR0		0x0c1
 #define	MSR_PERFCTR1		0x0c2
 #define	MSR_PLATFORM_INFO	0x0ce
 #define	MSR_MPERF		0x0e7
 #define	MSR_APERF		0x0e8
 #define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
 #define	MSR_MTRRcap		0x0fe
 #define	MSR_IA32_ARCH_CAP	0x10a
 #define	MSR_IA32_FLUSH_CMD	0x10b
 #define	MSR_TSX_FORCE_ABORT	0x10f
 #define	MSR_BBL_CR_ADDR		0x116
 #define	MSR_BBL_CR_DECC		0x118
 #define	MSR_BBL_CR_CTL		0x119
 #define	MSR_BBL_CR_TRIG		0x11a
 #define	MSR_BBL_CR_BUSY		0x11b
 #define	MSR_BBL_CR_CTL3		0x11e
 #define	MSR_IA32_TSX_CTRL	0x122
 #define	MSR_SYSENTER_CS_MSR	0x174
 #define	MSR_SYSENTER_ESP_MSR	0x175
 #define	MSR_SYSENTER_EIP_MSR	0x176
 #define	MSR_MCG_CAP		0x179
 #define	MSR_MCG_STATUS		0x17a
 #define	MSR_MCG_CTL		0x17b
 #define	MSR_EVNTSEL0		0x186
 #define	MSR_EVNTSEL1		0x187
 #define	MSR_THERM_CONTROL	0x19a
 #define	MSR_THERM_INTERRUPT	0x19b
 #define	MSR_THERM_STATUS	0x19c
 #define	MSR_IA32_MISC_ENABLE	0x1a0
 #define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
 #define	MSR_TURBO_RATIO_LIMIT	0x1ad
 #define	MSR_TURBO_RATIO_LIMIT1	0x1ae
 #define	MSR_DEBUGCTLMSR		0x1d9
 #define	MSR_LASTBRANCHFROMIP	0x1db
 #define	MSR_LASTBRANCHTOIP	0x1dc
 #define	MSR_LASTINTFROMIP	0x1dd
 #define	MSR_LASTINTTOIP		0x1de
 #define	MSR_ROB_CR_BKUPTMPDR6	0x1e0
 #define	MSR_MTRRVarBase		0x200
 #define	MSR_MTRR64kBase		0x250
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
 #define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
 #define	MSR_MC0_ADDR		0x402
 #define	MSR_MC0_MISC		0x403
 #define	MSR_MC1_CTL		0x404
 #define	MSR_MC1_STATUS		0x405
 #define	MSR_MC1_ADDR		0x406
 #define	MSR_MC1_MISC		0x407
 #define	MSR_MC2_CTL		0x408
 #define	MSR_MC2_STATUS		0x409
 #define	MSR_MC2_ADDR		0x40a
 #define	MSR_MC2_MISC		0x40b
 #define	MSR_MC3_CTL		0x40c
 #define	MSR_MC3_STATUS		0x40d
 #define	MSR_MC3_ADDR		0x40e
 #define	MSR_MC3_MISC		0x40f
 #define	MSR_MC4_CTL		0x410
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
 #define	MSR_RAPL_POWER_UNIT	0x606
 #define	MSR_PKG_ENERGY_STATUS	0x611
 #define	MSR_DRAM_ENERGY_STATUS	0x619
 #define	MSR_PP0_ENERGY_STATUS	0x639
 #define	MSR_PP1_ENERGY_STATUS	0x641
 #define	MSR_PPERF		0x64e
 #define	MSR_TSC_DEADLINE	0x6e0	/* Writes are not serializing */
 #define	MSR_IA32_PM_ENABLE	0x770
 #define	MSR_IA32_HWP_CAPABILITIES	0x771
 #define	MSR_IA32_HWP_REQUEST_PKG	0x772
 #define	MSR_IA32_HWP_INTERRUPT		0x773
 #define	MSR_IA32_HWP_REQUEST	0x774
 #define	MSR_IA32_HWP_STATUS	0x777
 
 /*
  * VMX MSRs
  */
 #define	MSR_VMX_BASIC		0x480
 #define	MSR_VMX_PINBASED_CTLS	0x481
 #define	MSR_VMX_PROCBASED_CTLS	0x482
 #define	MSR_VMX_EXIT_CTLS	0x483
 #define	MSR_VMX_ENTRY_CTLS	0x484
 #define	MSR_VMX_CR0_FIXED0	0x486
 #define	MSR_VMX_CR0_FIXED1	0x487
 #define	MSR_VMX_CR4_FIXED0	0x488
 #define	MSR_VMX_CR4_FIXED1	0x489
 #define	MSR_VMX_PROCBASED_CTLS2	0x48b
 #define	MSR_VMX_EPT_VPID_CAP	0x48c
 #define	MSR_VMX_TRUE_PINBASED_CTLS	0x48d
 #define	MSR_VMX_TRUE_PROCBASED_CTLS	0x48e
 #define	MSR_VMX_TRUE_EXIT_CTLS	0x48f
 #define	MSR_VMX_TRUE_ENTRY_CTLS	0x490
 
 /*
  * X2APIC MSRs.
  * Writes are not serializing.
  */
 #define	MSR_APIC_000		0x800
 #define	MSR_APIC_ID		0x802
 #define	MSR_APIC_VERSION	0x803
 #define	MSR_APIC_TPR		0x808
 #define	MSR_APIC_EOI		0x80b
 #define	MSR_APIC_LDR		0x80d
 #define	MSR_APIC_SVR		0x80f
 #define	MSR_APIC_ISR0		0x810
 #define	MSR_APIC_ISR1		0x811
 #define	MSR_APIC_ISR2		0x812
 #define	MSR_APIC_ISR3		0x813
 #define	MSR_APIC_ISR4		0x814
 #define	MSR_APIC_ISR5		0x815
 #define	MSR_APIC_ISR6		0x816
 #define	MSR_APIC_ISR7		0x817
 #define	MSR_APIC_TMR0		0x818
 #define	MSR_APIC_IRR0		0x820
 #define	MSR_APIC_ESR		0x828
 #define	MSR_APIC_LVT_CMCI	0x82F
 #define	MSR_APIC_ICR		0x830
 #define	MSR_APIC_LVT_TIMER	0x832
 #define	MSR_APIC_LVT_THERMAL	0x833
 #define	MSR_APIC_LVT_PCINT	0x834
 #define	MSR_APIC_LVT_LINT0	0x835
 #define	MSR_APIC_LVT_LINT1	0x836
 #define	MSR_APIC_LVT_ERROR	0x837
 #define	MSR_APIC_ICR_TIMER	0x838
 #define	MSR_APIC_CCR_TIMER	0x839
 #define	MSR_APIC_DCR_TIMER	0x83e
 #define	MSR_APIC_SELF_IPI	0x83f
 
 #define	MSR_IA32_XSS		0xda0
 
 /*
  * Intel Processor Trace (PT) MSRs.
  */
 #define	MSR_IA32_RTIT_OUTPUT_BASE	0x560	/* Trace Output Base Register (R/W) */
 #define	MSR_IA32_RTIT_OUTPUT_MASK_PTRS	0x561	/* Trace Output Mask Pointers Register (R/W) */
 #define	MSR_IA32_RTIT_CTL		0x570	/* Trace Control Register (R/W) */
 #define	 RTIT_CTL_TRACEEN	(1 << 0)
 #define	 RTIT_CTL_CYCEN		(1 << 1)
 #define	 RTIT_CTL_OS		(1 << 2)
 #define	 RTIT_CTL_USER		(1 << 3)
 #define	 RTIT_CTL_PWREVTEN	(1 << 4)
 #define	 RTIT_CTL_FUPONPTW	(1 << 5)
 #define	 RTIT_CTL_FABRICEN	(1 << 6)
 #define	 RTIT_CTL_CR3FILTER	(1 << 7)
 #define	 RTIT_CTL_TOPA		(1 << 8)
 #define	 RTIT_CTL_MTCEN		(1 << 9)
 #define	 RTIT_CTL_TSCEN		(1 << 10)
 #define	 RTIT_CTL_DISRETC	(1 << 11)
 #define	 RTIT_CTL_PTWEN		(1 << 12)
 #define	 RTIT_CTL_BRANCHEN	(1 << 13)
 #define	 RTIT_CTL_MTC_FREQ_S	14
 #define	 RTIT_CTL_MTC_FREQ(n)	((n) << RTIT_CTL_MTC_FREQ_S)
 #define	 RTIT_CTL_MTC_FREQ_M	(0xf << RTIT_CTL_MTC_FREQ_S)
 #define	 RTIT_CTL_CYC_THRESH_S	19
 #define	 RTIT_CTL_CYC_THRESH_M	(0xf << RTIT_CTL_CYC_THRESH_S)
 #define	 RTIT_CTL_PSB_FREQ_S	24
 #define	 RTIT_CTL_PSB_FREQ_M	(0xf << RTIT_CTL_PSB_FREQ_S)
 #define	 RTIT_CTL_ADDR_CFG_S(n) (32 + (n) * 4)
 #define	 RTIT_CTL_ADDR0_CFG_S	32
 #define	 RTIT_CTL_ADDR0_CFG_M	(0xfULL << RTIT_CTL_ADDR0_CFG_S)
 #define	 RTIT_CTL_ADDR1_CFG_S	36
 #define	 RTIT_CTL_ADDR1_CFG_M	(0xfULL << RTIT_CTL_ADDR1_CFG_S)
 #define	 RTIT_CTL_ADDR2_CFG_S	40
 #define	 RTIT_CTL_ADDR2_CFG_M	(0xfULL << RTIT_CTL_ADDR2_CFG_S)
 #define	 RTIT_CTL_ADDR3_CFG_S	44
 #define	 RTIT_CTL_ADDR3_CFG_M	(0xfULL << RTIT_CTL_ADDR3_CFG_S)
 #define	MSR_IA32_RTIT_STATUS		0x571	/* Tracing Status Register (R/W) */
 #define	 RTIT_STATUS_FILTEREN	(1 << 0)
 #define	 RTIT_STATUS_CONTEXTEN	(1 << 1)
 #define	 RTIT_STATUS_TRIGGEREN	(1 << 2)
 #define	 RTIT_STATUS_ERROR	(1 << 4)
 #define	 RTIT_STATUS_STOPPED	(1 << 5)
 #define	 RTIT_STATUS_PACKETBYTECNT_S	32
 #define	 RTIT_STATUS_PACKETBYTECNT_M	(0x1ffffULL << RTIT_STATUS_PACKETBYTECNT_S)
 #define	MSR_IA32_RTIT_CR3_MATCH		0x572	/* Trace Filter CR3 Match Register (R/W) */
 #define	MSR_IA32_RTIT_ADDR_A(n)		(0x580 + (n) * 2)
 #define	MSR_IA32_RTIT_ADDR_B(n)		(0x581 + (n) * 2)
 #define	MSR_IA32_RTIT_ADDR0_A		0x580	/* Region 0 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR0_B		0x581	/* Region 0 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR1_A		0x582	/* Region 1 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR1_B		0x583	/* Region 1 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR2_A		0x584	/* Region 2 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR2_B		0x585	/* Region 2 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR3_A		0x586	/* Region 3 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR3_B		0x587	/* Region 3 End Address (R/W) */
 
 /* Intel Processor Trace Table of Physical Addresses (ToPA). */
 #define	TOPA_SIZE_S	6
 #define	TOPA_SIZE_M	(0xf << TOPA_SIZE_S)
 #define	TOPA_SIZE_4K	(0 << TOPA_SIZE_S)
 #define	TOPA_SIZE_8K	(1 << TOPA_SIZE_S)
 #define	TOPA_SIZE_16K	(2 << TOPA_SIZE_S)
 #define	TOPA_SIZE_32K	(3 << TOPA_SIZE_S)
 #define	TOPA_SIZE_64K	(4 << TOPA_SIZE_S)
 #define	TOPA_SIZE_128K	(5 << TOPA_SIZE_S)
 #define	TOPA_SIZE_256K	(6 << TOPA_SIZE_S)
 #define	TOPA_SIZE_512K	(7 << TOPA_SIZE_S)
 #define	TOPA_SIZE_1M	(8 << TOPA_SIZE_S)
 #define	TOPA_SIZE_2M	(9 << TOPA_SIZE_S)
 #define	TOPA_SIZE_4M	(10 << TOPA_SIZE_S)
 #define	TOPA_SIZE_8M	(11 << TOPA_SIZE_S)
 #define	TOPA_SIZE_16M	(12 << TOPA_SIZE_S)
 #define	TOPA_SIZE_32M	(13 << TOPA_SIZE_S)
 #define	TOPA_SIZE_64M	(14 << TOPA_SIZE_S)
 #define	TOPA_SIZE_128M	(15 << TOPA_SIZE_S)
 #define	TOPA_STOP	(1 << 4)
 #define	TOPA_INT	(1 << 2)
 #define	TOPA_END	(1 << 0)
 
 /*
  * Constants related to MSR's.
  */
 #define	APICBASE_RESERVED	0x000002ff
 #define	APICBASE_BSP		0x00000100
 #define	APICBASE_X2APIC		0x00000400
 #define	APICBASE_ENABLED	0x00000800
 #define	APICBASE_ADDRESS	0xfffff000
 
 /* MSR_IA32_FEATURE_CONTROL related */
 #define	IA32_FEATURE_CONTROL_LOCK	0x01	/* lock bit */
 #define	IA32_FEATURE_CONTROL_SMX_EN	0x02	/* enable VMX inside SMX */
 #define	IA32_FEATURE_CONTROL_VMX_EN	0x04	/* enable VMX outside SMX */
 
 /* MSR IA32_MISC_ENABLE */
 #define	IA32_MISC_EN_FASTSTR	0x0000000000000001ULL
 #define	IA32_MISC_EN_ATCCE	0x0000000000000008ULL
 #define	IA32_MISC_EN_PERFMON	0x0000000000000080ULL
 #define	IA32_MISC_EN_PEBSU	0x0000000000001000ULL
 #define	IA32_MISC_EN_ESSTE	0x0000000000010000ULL
 #define	IA32_MISC_EN_MONE	0x0000000000040000ULL
 #define	IA32_MISC_EN_LIMCPUID	0x0000000000400000ULL
 #define	IA32_MISC_EN_xTPRD	0x0000000000800000ULL
 #define	IA32_MISC_EN_XDD	0x0000000400000000ULL
 
 /*
  * IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel'
  * document 336996-001 Speculative Execution Side Channel Mitigations.
  *
  * AMD uses the same MSRs and bit definitions, as described in 111006-B
  * "Indirect Branch Control Extension" and 124441 "Speculative Store Bypass
  * Disable."
  */
 /* MSR IA32_SPEC_CTRL */
 #define	IA32_SPEC_CTRL_IBRS	0x00000001
 #define	IA32_SPEC_CTRL_STIBP	0x00000002
 #define	IA32_SPEC_CTRL_SSBD	0x00000004
 
 /* MSR IA32_PRED_CMD */
 #define	IA32_PRED_CMD_IBPB_BARRIER	0x0000000000000001ULL
 
 /* MSR IA32_FLUSH_CMD */
 #define	IA32_FLUSH_CMD_L1D	0x00000001
 
 /* MSR IA32_HWP_CAPABILITIES */
 #define	IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(x)	(((x) >> 0) & 0xff)
 #define	IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(x)	(((x) >> 8) & 0xff)
 #define	IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(x)	(((x) >> 16) & 0xff)
 #define	IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(x)	(((x) >> 24) & 0xff)
 
 /* MSR IA32_HWP_REQUEST */
 #define	IA32_HWP_REQUEST_MINIMUM_VALID			(1ULL << 63)
 #define	IA32_HWP_REQUEST_MAXIMUM_VALID			(1ULL << 62)
 #define	IA32_HWP_REQUEST_DESIRED_VALID			(1ULL << 61)
 #define	IA32_HWP_REQUEST_EPP_VALID 			(1ULL << 60)
 #define	IA32_HWP_REQUEST_ACTIVITY_WINDOW_VALID		(1ULL << 59)
 #define	IA32_HWP_REQUEST_PACKAGE_CONTROL		(1ULL << 42)
 #define	IA32_HWP_ACTIVITY_WINDOW			(0x3ffULL << 32)
 #define	IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE	(0xffULL << 24)
 #define	IA32_HWP_DESIRED_PERFORMANCE			(0xffULL << 16)
 #define	IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE		(0xffULL << 8)
 #define	IA32_HWP_MINIMUM_PERFORMANCE			(0xffULL << 0)
 
 /*
  * PAT modes.
  */
 #define	PAT_UNCACHEABLE		0x00
 #define	PAT_WRITE_COMBINING	0x01
 #define	PAT_WRITE_THROUGH	0x04
 #define	PAT_WRITE_PROTECTED	0x05
 #define	PAT_WRITE_BACK		0x06
 #define	PAT_UNCACHED		0x07
 #define	PAT_VALUE(i, m)		((long long)(m) << (8 * (i)))
 #define	PAT_MASK(i)		PAT_VALUE(i, 0xff)
 
 /*
  * Constants related to MTRRs
  */
 #define	MTRR_UNCACHEABLE	0x00
 #define	MTRR_WRITE_COMBINING	0x01
 #define	MTRR_WRITE_THROUGH	0x04
 #define	MTRR_WRITE_PROTECTED	0x05
 #define	MTRR_WRITE_BACK		0x06
 #define	MTRR_N64K		8	/* numbers of fixed-size entries */
 #define	MTRR_N16K		16
 #define	MTRR_N4K		64
 #define	MTRR_CAP_WC		0x0000000000000400
 #define	MTRR_CAP_FIXED		0x0000000000000100
 #define	MTRR_CAP_VCNT		0x00000000000000ff
 #define	MTRR_DEF_ENABLE		0x0000000000000800
 #define	MTRR_DEF_FIXED_ENABLE	0x0000000000000400
 #define	MTRR_DEF_TYPE		0x00000000000000ff
 #define	MTRR_PHYSBASE_PHYSBASE	0x000ffffffffff000
 #define	MTRR_PHYSBASE_TYPE	0x00000000000000ff
 #define	MTRR_PHYSMASK_PHYSMASK	0x000ffffffffff000
 #define	MTRR_PHYSMASK_VALID	0x0000000000000800
 
 /*
  * Cyrix configuration registers, accessible as IO ports.
  */
 #define	CCR0			0xc0	/* Configuration control register 0 */
 #define	CCR0_NC0		0x01	/* First 64K of each 1M memory region is
 								   non-cacheable */
 #define	CCR0_NC1		0x02	/* 640K-1M region is non-cacheable */
 #define	CCR0_A20M		0x04	/* Enables A20M# input pin */
 #define	CCR0_KEN		0x08	/* Enables KEN# input pin */
 #define	CCR0_FLUSH		0x10	/* Enables FLUSH# input pin */
 #define	CCR0_BARB		0x20	/* Flushes internal cache when entering hold
 								   state */
 #define	CCR0_CO			0x40	/* Cache org: 1=direct mapped, 0=2x set
 								   assoc */
 #define	CCR0_SUSPEND	0x80	/* Enables SUSP# and SUSPA# pins */
 
 #define	CCR1			0xc1	/* Configuration control register 1 */
 #define	CCR1_RPL		0x01	/* Enables RPLSET and RPLVAL# pins */
 #define	CCR1_SMI		0x02	/* Enables SMM pins */
 #define	CCR1_SMAC		0x04	/* System management memory access */
 #define	CCR1_MMAC		0x08	/* Main memory access */
 #define	CCR1_NO_LOCK	0x10	/* Negate LOCK# */
 #define	CCR1_SM3		0x80	/* SMM address space address region 3 */
 
 #define	CCR2			0xc2
 #define	CCR2_WB			0x02	/* Enables WB cache interface pins */
 #define	CCR2_SADS		0x02	/* Slow ADS */
 #define	CCR2_LOCK_NW	0x04	/* LOCK NW Bit */
 #define	CCR2_SUSP_HLT	0x08	/* Suspend on HALT */
 #define	CCR2_WT1		0x10	/* WT region 1 */
 #define	CCR2_WPR1		0x10	/* Write-protect region 1 */
 #define	CCR2_BARB		0x20	/* Flushes write-back cache when entering
 								   hold state. */
 #define	CCR2_BWRT		0x40	/* Enables burst write cycles */
 #define	CCR2_USE_SUSP	0x80	/* Enables suspend pins */
 
 #define	CCR3			0xc3
 #define	CCR3_SMILOCK	0x01	/* SMM register lock */
 #define	CCR3_NMI		0x02	/* Enables NMI during SMM */
 #define	CCR3_LINBRST	0x04	/* Linear address burst cycles */
 #define	CCR3_SMMMODE	0x08	/* SMM Mode */
 #define	CCR3_MAPEN0		0x10	/* Enables Map0 */
 #define	CCR3_MAPEN1		0x20	/* Enables Map1 */
 #define	CCR3_MAPEN2		0x40	/* Enables Map2 */
 #define	CCR3_MAPEN3		0x80	/* Enables Map3 */
 
 #define	CCR4			0xe8
 #define	CCR4_IOMASK		0x07
 #define	CCR4_MEM		0x08	/* Enables momory bypassing */
 #define	CCR4_DTE		0x10	/* Enables directory table entry cache */
 #define	CCR4_FASTFPE	0x20	/* Fast FPU exception */
 #define	CCR4_CPUID		0x80	/* Enables CPUID instruction */
 
 #define	CCR5			0xe9
 #define	CCR5_WT_ALLOC	0x01	/* Write-through allocate */
 #define	CCR5_SLOP		0x02	/* LOOP instruction slowed down */
 #define	CCR5_LBR1		0x10	/* Local bus region 1 */
 #define	CCR5_ARREN		0x20	/* Enables ARR region */
 
 #define	CCR6			0xea
 
 #define	CCR7			0xeb
 
 /* Performance Control Register (5x86 only). */
 #define	PCR0			0x20
 #define	PCR0_RSTK		0x01	/* Enables return stack */
 #define	PCR0_BTB		0x02	/* Enables branch target buffer */
 #define	PCR0_LOOP		0x04	/* Enables loop */
 #define	PCR0_AIS		0x08	/* Enables all instrcutions stalled to
 								   serialize pipe. */
 #define	PCR0_MLR		0x10	/* Enables reordering of misaligned loads */
 #define	PCR0_BTBRT		0x40	/* Enables BTB test register. */
 #define	PCR0_LSSER		0x80	/* Disable reorder */
 
 /* Device Identification Registers */
 #define	DIR0			0xfe
 #define	DIR1			0xff
 
 /*
  * Machine Check register constants.
  */
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
 #define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
 #define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
 #define	MCG_CTL_ENABLE		0xffffffffffffffff
 #define	MCG_CTL_DISABLE		0x0000000000000000
 #define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
 #define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
 #define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
 #define	MC_STATUS_EN		0x1000000000000000
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
 #define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
 #define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
 #define	MC_CTL2_THRESHOLD	0x0000000000007fff
 #define	MC_CTL2_CMCI_EN		0x0000000040000000
 #define	MC_AMDNB_BANK		4
 #define	MC_MISC_AMD_VAL		0x8000000000000000	/* Counter presence valid */
 #define	MC_MISC_AMD_CNTP	0x4000000000000000	/* Counter present */
 #define	MC_MISC_AMD_LOCK	0x2000000000000000	/* Register locked */
 #define	MC_MISC_AMD_INTP	0x1000000000000000	/* Int. type can generate interrupts */
 #define	MC_MISC_AMD_LVT_MASK	0x00f0000000000000	/* Extended LVT offset */
 #define	MC_MISC_AMD_LVT_SHIFT	52
 #define	MC_MISC_AMD_CNTEN	0x0008000000000000	/* Counter enabled */
 #define	MC_MISC_AMD_INT_MASK	0x0006000000000000	/* Interrupt type */
 #define	MC_MISC_AMD_INT_LVT	0x0002000000000000	/* Interrupt via Extended LVT */
 #define	MC_MISC_AMD_INT_SMI	0x0004000000000000	/* SMI */
 #define	MC_MISC_AMD_OVERFLOW	0x0001000000000000	/* Counter overflow */
 #define	MC_MISC_AMD_CNT_MASK	0x00000fff00000000	/* Counter value */
 #define	MC_MISC_AMD_CNT_SHIFT	32
 #define	MC_MISC_AMD_CNT_MAX	0xfff
 #define	MC_MISC_AMD_PTR_MASK	0x00000000ff000000	/* Pointer to additional registers */
 #define	MC_MISC_AMD_PTR_SHIFT	24
 
 /* AMD Scalable MCA */
 #define MSR_SMCA_MC0_CTL          0xc0002000
 #define MSR_SMCA_MC0_STATUS       0xc0002001
 #define MSR_SMCA_MC0_ADDR         0xc0002002
 #define MSR_SMCA_MC0_MISC0        0xc0002003
 #define MSR_SMCA_MC_CTL(x)       (MSR_SMCA_MC0_CTL + 0x10 * (x))
 #define MSR_SMCA_MC_STATUS(x)    (MSR_SMCA_MC0_STATUS + 0x10 * (x))
 #define MSR_SMCA_MC_ADDR(x)      (MSR_SMCA_MC0_ADDR + 0x10 * (x))
 #define MSR_SMCA_MC_MISC(x)      (MSR_SMCA_MC0_MISC0 + 0x10 * (x))
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.
  * These registers must be written as three separate bytes.
  *
  * NCRx+0: A31-A24 of starting address
  * NCRx+1: A23-A16 of starting address
  * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
  *
  * The non-cacheable region's starting address must be aligned to the
  * size indicated by the NCR_SIZE_xx field.
  */
 #define	NCR1	0xc4
 #define	NCR2	0xc7
 #define	NCR3	0xca
 #define	NCR4	0xcd
 
 #define	NCR_SIZE_0K	0
 #define	NCR_SIZE_4K	1
 #define	NCR_SIZE_8K	2
 #define	NCR_SIZE_16K	3
 #define	NCR_SIZE_32K	4
 #define	NCR_SIZE_64K	5
 #define	NCR_SIZE_128K	6
 #define	NCR_SIZE_256K	7
 #define	NCR_SIZE_512K	8
 #define	NCR_SIZE_1M	9
 #define	NCR_SIZE_2M	10
 #define	NCR_SIZE_4M	11
 #define	NCR_SIZE_8M	12
 #define	NCR_SIZE_16M	13
 #define	NCR_SIZE_32M	14
 #define	NCR_SIZE_4G	15
 
 /*
  * The address region registers are used to specify the location and
  * size for the eight address regions.
  *
  * ARRx + 0: A31-A24 of start address
  * ARRx + 1: A23-A16 of start address
  * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
  */
 #define	ARR0	0xc4
 #define	ARR1	0xc7
 #define	ARR2	0xca
 #define	ARR3	0xcd
 #define	ARR4	0xd0
 #define	ARR5	0xd3
 #define	ARR6	0xd6
 #define	ARR7	0xd9
 
 #define	ARR_SIZE_0K		0
 #define	ARR_SIZE_4K		1
 #define	ARR_SIZE_8K		2
 #define	ARR_SIZE_16K	3
 #define	ARR_SIZE_32K	4
 #define	ARR_SIZE_64K	5
 #define	ARR_SIZE_128K	6
 #define	ARR_SIZE_256K	7
 #define	ARR_SIZE_512K	8
 #define	ARR_SIZE_1M		9
 #define	ARR_SIZE_2M		10
 #define	ARR_SIZE_4M		11
 #define	ARR_SIZE_8M		12
 #define	ARR_SIZE_16M	13
 #define	ARR_SIZE_32M	14
 #define	ARR_SIZE_4G		15
 
 /*
  * The region control registers specify the attributes associated with
  * the ARRx addres regions.
  */
 #define	RCR0	0xdc
 #define	RCR1	0xdd
 #define	RCR2	0xde
 #define	RCR3	0xdf
 #define	RCR4	0xe0
 #define	RCR5	0xe1
 #define	RCR6	0xe2
 #define	RCR7	0xe3
 
 #define	RCR_RCD	0x01	/* Disables caching for ARRx (x = 0-6). */
 #define	RCR_RCE	0x01	/* Enables caching for ARR7. */
 #define	RCR_WWO	0x02	/* Weak write ordering. */
 #define	RCR_WL	0x04	/* Weak locking. */
 #define	RCR_WG	0x08	/* Write gathering. */
 #define	RCR_WT	0x10	/* Write-through. */
 #define	RCR_NLB	0x20	/* LBA# pin is not asserted. */
 
 /* AMD Write Allocate Top-Of-Memory and Control Register */
 #define	AMD_WT_ALLOC_TME	0x40000	/* top-of-memory enable */
 #define	AMD_WT_ALLOC_PRE	0x20000	/* programmable range enable */
 #define	AMD_WT_ALLOC_FRE	0x10000	/* fixed (A0000-FFFFF) range enable */
 
 /* AMD64 MSR's */
 #define	MSR_EFER	0xc0000080	/* extended features */
 #define	MSR_STAR	0xc0000081	/* legacy mode SYSCALL target/cs/ss */
 #define	MSR_LSTAR	0xc0000082	/* long mode SYSCALL target rip */
 #define	MSR_CSTAR	0xc0000083	/* compat mode SYSCALL target rip */
 #define	MSR_SF_MASK	0xc0000084	/* syscall flags mask */
 #define	MSR_FSBASE	0xc0000100	/* base address of the %fs "segment" */
 #define	MSR_GSBASE	0xc0000101	/* base address of the %gs "segment" */
 #define	MSR_KGSBASE	0xc0000102	/* base address of the kernel %gs */
 #define	MSR_TSC_AUX	0xc0000103
 #define	MSR_PERFEVSEL0	0xc0010000
 #define	MSR_PERFEVSEL1	0xc0010001
 #define	MSR_PERFEVSEL2	0xc0010002
 #define	MSR_PERFEVSEL3	0xc0010003
 #define	MSR_K7_PERFCTR0	0xc0010004
 #define	MSR_K7_PERFCTR1	0xc0010005
 #define	MSR_K7_PERFCTR2	0xc0010006
 #define	MSR_K7_PERFCTR3	0xc0010007
 #define	MSR_SYSCFG	0xc0010010
 #define	MSR_HWCR	0xc0010015
 #define	MSR_IORRBASE0	0xc0010016
 #define	MSR_IORRMASK0	0xc0010017
 #define	MSR_IORRBASE1	0xc0010018
 #define	MSR_IORRMASK1	0xc0010019
 #define	MSR_TOP_MEM	0xc001001a	/* boundary for ram below 4G */
 #define	MSR_TOP_MEM2	0xc001001d	/* boundary for ram above 4G */
 #define	MSR_NB_CFG1	0xc001001f	/* NB configuration 1 */
 #define	MSR_K8_UCODE_UPDATE 0xc0010020	/* update microcode */
 #define	MSR_MC0_CTL_MASK 0xc0010044
 #define	MSR_P_STATE_LIMIT 0xc0010061	/* P-state Current Limit Register */
 #define	MSR_P_STATE_CONTROL 0xc0010062	/* P-state Control Register */
 #define	MSR_P_STATE_STATUS 0xc0010063	/* P-state Status Register */
 #define	MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */
 #define	MSR_SMM_ADDR	0xc0010112	/* SMM TSEG base address */
 #define	MSR_SMM_MASK	0xc0010113	/* SMM TSEG address mask */
 #define	MSR_VM_CR	0xc0010114	/* SVM: feature control */
 #define	MSR_VM_HSAVE_PA 0xc0010117	/* SVM: host save area address */
 #define	MSR_AMD_CPUID07	0xc0011002	/* CPUID 07 %ebx override */
 #define	MSR_EXTFEATURES	0xc0011005	/* Extended CPUID Features override */
 #define	MSR_LS_CFG	0xc0011020
 #define	MSR_IC_CFG	0xc0011021	/* Instruction Cache Configuration */
 
 /* MSR_VM_CR related */
 #define	VM_CR_SVMDIS		0x10	/* SVM: disabled by BIOS */
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */
 
 /* VIA ACE crypto featureset: for via_feature_xcrypt */
 #define	VIA_HAS_AES		1	/* cpu has AES */
 #define	VIA_HAS_SHA		2	/* cpu has SHA1 & SHA256 */
 #define	VIA_HAS_MM		4	/* cpu has RSA instructions */
 #define	VIA_HAS_AESCTR		8	/* cpu has AES-CTR instructions */
 
 /* Centaur Extended Feature flags */
 #define	VIA_CPUID_HAS_RNG	0x000004
 #define	VIA_CPUID_DO_RNG	0x000008
 #define	VIA_CPUID_HAS_ACE	0x000040
 #define	VIA_CPUID_DO_ACE	0x000080
 #define	VIA_CPUID_HAS_ACE2	0x000100
 #define	VIA_CPUID_DO_ACE2	0x000200
 #define	VIA_CPUID_HAS_PHE	0x000400
 #define	VIA_CPUID_DO_PHE	0x000800
 #define	VIA_CPUID_HAS_PMM	0x001000
 #define	VIA_CPUID_DO_PMM	0x002000
 
 /* VIA ACE xcrypt-* instruction context control options */
 #define	VIA_CRYPT_CWLO_ROUND_M		0x0000000f
 #define	VIA_CRYPT_CWLO_ALG_M		0x00000070
 #define	VIA_CRYPT_CWLO_ALG_AES		0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_M		0x00000080
 #define	VIA_CRYPT_CWLO_KEYGEN_HW	0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_SW	0x00000080
 #define	VIA_CRYPT_CWLO_NORMAL		0x00000000
 #define	VIA_CRYPT_CWLO_INTERMEDIATE	0x00000100
 #define	VIA_CRYPT_CWLO_ENCRYPT		0x00000000
 #define	VIA_CRYPT_CWLO_DECRYPT		0x00000200
 #define	VIA_CRYPT_CWLO_KEY128		0x0000000a	/* 128bit, 10 rds */
 #define	VIA_CRYPT_CWLO_KEY192		0x0000040c	/* 192bit, 12 rds */
 #define	VIA_CRYPT_CWLO_KEY256		0x0000080e	/* 256bit, 15 rds */
 
 #endif /* !_MACHINE_SPECIALREG_H_ */
Index: head/sys/x86/x86/identcpu.c
===================================================================
--- head/sys/x86/x86/identcpu.c	(revision 356939)
+++ head/sys/x86/x86/identcpu.c	(revision 356940)
@@ -1,2651 +1,2670 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * Copyright (c) 1997 KATO Takenori.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/power.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/asmacros.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <amd64/vmm/intel/vmx_controls.h>
 #include <x86/isa/icu.h>
 #include <x86/vmware.h>
 
 #ifdef __i386__
 #define	IDENTBLUE_CYRIX486	0
 #define	IDENTBLUE_IBMCPU	1
 #define	IDENTBLUE_CYRIXM2	2
 
 static void identifycyrix(void);
 static void print_transmeta_info(void);
 #endif
 static u_int find_cpu_vendor_id(void);
 static void print_AMD_info(void);
 static void print_INTEL_info(void);
 static void print_INTEL_TLB(u_int data);
 static void print_hypervisor_info(void);
 static void print_svm_info(void);
 static void print_via_padlock_info(void);
 static void print_vmx_info(void);
 
 #ifdef __i386__
 int	cpu;			/* Are we 386, 386sx, 486, etc? */
 int	cpu_class;
 #endif
 u_int	cpu_feature;		/* Feature flags */
 u_int	cpu_feature2;		/* Feature flags */
 u_int	amd_feature;		/* AMD feature flags */
 u_int	amd_feature2;		/* AMD feature flags */
 u_int	amd_rascap;		/* AMD RAS capabilities */
 u_int	amd_pminfo;		/* AMD advanced power management info */
 u_int	amd_extended_feature_extensions;
 u_int	via_feature_rng;	/* VIA RNG features */
 u_int	via_feature_xcrypt;	/* VIA ACE features */
 u_int	cpu_high;		/* Highest arg to CPUID */
 u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
 u_int	cpu_id;			/* Stepping ID */
 u_int	cpu_procinfo;		/* HyperThreading Info / Brand Index / CLFUSH */
 u_int	cpu_procinfo2;		/* Multicore info */
 char	cpu_vendor[20];		/* CPU Origin code */
 u_int	cpu_vendor_id;		/* CPU vendor ID */
 u_int	cpu_fxsr;		/* SSE enabled */
 u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
 u_int	cpu_clflush_line_size = 32;
 u_int	cpu_stdext_feature;	/* %ebx */
 u_int	cpu_stdext_feature2;	/* %ecx */
 u_int	cpu_stdext_feature3;	/* %edx */
 uint64_t cpu_ia32_arch_caps;
 u_int	cpu_max_ext_state_size;
 u_int	cpu_mon_mwait_flags;	/* MONITOR/MWAIT flags (CPUID.05H.ECX) */
 u_int	cpu_mon_min_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_mon_max_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_maxphyaddr;		/* Max phys addr width in bits */
 u_int	cpu_power_eax;		/* 06H: Power management leaf, %eax */
 u_int	cpu_power_ebx;		/* 06H: Power management leaf, %eax */
 u_int	cpu_power_ecx;		/* 06H: Power management leaf, %eax */
 u_int	cpu_power_edx;		/* 06H: Power management leaf, %eax */
 char machine[] = MACHINE;
 
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
     &via_feature_rng, 0,
     "VIA RNG feature available in CPU");
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
     &via_feature_xcrypt, 0,
     "VIA xcrypt feature available in CPU");
 
 #ifdef __amd64__
 #ifdef SCTL_MASK32
 extern int adaptive_machine_arch;
 #endif
 
 static int
 sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
 {
 #ifdef SCTL_MASK32
 	static const char machine32[] = "i386";
 #endif
 	int error;
 
 #ifdef SCTL_MASK32
 	if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
 		error = SYSCTL_OUT(req, machine32, sizeof(machine32));
 	else
 #endif
 		error = SYSCTL_OUT(req, machine, sizeof(machine));
 	return (error);
 
 }
 SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class");
 #else
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
     machine, 0, "Machine class");
 #endif
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE,
     cpu_model, 0, "Machine model");
 
 static int hw_clockrate;
 SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD,
     &hw_clockrate, 0, "CPU instruction clock rate");
 
 u_int hv_base;
 u_int hv_high;
 char hv_vendor[16];
 SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor,
     0, "Hypervisor vendor");
 
 static eventhandler_tag tsc_post_tag;
 
 static char cpu_brand[48];
 
 #ifdef __i386__
 #define	MAX_BRAND_INDEX	8
 
 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = {
 	NULL,			/* No brand */
 	"Intel Celeron",
 	"Intel Pentium III",
 	"Intel Pentium III Xeon",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"Intel Pentium 4"
 };
 
 static struct {
 	char	*cpu_name;
 	int	cpu_class;
 } cpus[] = {
 	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
 	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
 	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
 	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
 	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
 	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
 	{ "Cyrix 486",		CPUCLASS_486 },		/* CPU_486DLC */
 	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
 	{ "Cyrix 5x86",		CPUCLASS_486 },		/* CPU_M1SC */
 	{ "Cyrix 6x86",		CPUCLASS_486 },		/* CPU_M1 */
 	{ "Blue Lightning",	CPUCLASS_486 },		/* CPU_BLUE */
 	{ "Cyrix 6x86MX",	CPUCLASS_686 },		/* CPU_M2 */
 	{ "NexGen 586",		CPUCLASS_386 },		/* CPU_NX586 (XXX) */
 	{ "Cyrix 486S/DX",	CPUCLASS_486 },		/* CPU_CY486DX */
 	{ "Pentium II",		CPUCLASS_686 },		/* CPU_PII */
 	{ "Pentium III",	CPUCLASS_686 },		/* CPU_PIII */
 	{ "Pentium 4",		CPUCLASS_686 },		/* CPU_P4 */
 };
 #endif
 
 static struct {
 	char	*vendor;
 	u_int	vendor_id;
 } cpu_vendors[] = {
 	{ INTEL_VENDOR_ID,	CPU_VENDOR_INTEL },	/* GenuineIntel */
 	{ AMD_VENDOR_ID,	CPU_VENDOR_AMD },	/* AuthenticAMD */
+	{ HYGON_VENDOR_ID,	CPU_VENDOR_HYGON },	/* HygonGenuine*/
 	{ CENTAUR_VENDOR_ID,	CPU_VENDOR_CENTAUR },	/* CentaurHauls */
 #ifdef __i386__
 	{ NSC_VENDOR_ID,	CPU_VENDOR_NSC },	/* Geode by NSC */
 	{ CYRIX_VENDOR_ID,	CPU_VENDOR_CYRIX },	/* CyrixInstead */
 	{ TRANSMETA_VENDOR_ID,	CPU_VENDOR_TRANSMETA },	/* GenuineTMx86 */
 	{ SIS_VENDOR_ID,	CPU_VENDOR_SIS },	/* SiS SiS SiS  */
 	{ UMC_VENDOR_ID,	CPU_VENDOR_UMC },	/* UMC UMC UMC  */
 	{ NEXGEN_VENDOR_ID,	CPU_VENDOR_NEXGEN },	/* NexGenDriven */
 	{ RISE_VENDOR_ID,	CPU_VENDOR_RISE },	/* RiseRiseRise */
 #if 0
 	/* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */
 	{ "TransmetaCPU",	CPU_VENDOR_TRANSMETA },
 #endif
 #endif
 };
 
 void
 printcpuinfo(void)
 {
 	u_int regs[4], i;
 	char *brand;
 
 	printf("CPU: ");
 #ifdef __i386__
 	cpu_class = cpus[cpu].cpu_class;
 	strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model));
 #else
 	strncpy(cpu_model, "Hammer", sizeof (cpu_model));
 #endif
 
 	/* Check for extended CPUID information and a processor name. */
 	if (cpu_exthigh >= 0x80000004) {
 		brand = cpu_brand;
 		for (i = 0x80000002; i < 0x80000005; i++) {
 			do_cpuid(i, regs);
 			memcpy(brand, regs, sizeof(regs));
 			brand += sizeof(regs);
 		}
 	}
 
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_INTEL:
 #ifdef __i386__
 		if ((cpu_id & 0xf00) > 0x300) {
 			u_int brand_index;
 
 			cpu_model[0] = '\0';
 
 			switch (cpu_id & 0x3000) {
 			case 0x1000:
 				strcpy(cpu_model, "Overdrive ");
 				break;
 			case 0x2000:
 				strcpy(cpu_model, "Dual ");
 				break;
 			}
 
 			switch (cpu_id & 0xf00) {
 			case 0x400:
 				strcat(cpu_model, "i486 ");
 			        /* Check the particular flavor of 486 */
 				switch (cpu_id & 0xf0) {
 				case 0x00:
 				case 0x10:
 					strcat(cpu_model, "DX");
 					break;
 				case 0x20:
 					strcat(cpu_model, "SX");
 					break;
 				case 0x30:
 					strcat(cpu_model, "DX2");
 					break;
 				case 0x40:
 					strcat(cpu_model, "SL");
 					break;
 				case 0x50:
 					strcat(cpu_model, "SX2");
 					break;
 				case 0x70:
 					strcat(cpu_model,
 					    "DX2 Write-Back Enhanced");
 					break;
 				case 0x80:
 					strcat(cpu_model, "DX4");
 					break;
 				}
 				break;
 			case 0x500:
 			        /* Check the particular flavor of 586 */
 			        strcat(cpu_model, "Pentium");
 			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, " A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "/P5");
 					break;
 				case 0x20:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x30:
 				        strcat(cpu_model, "/P24T");
 					break;
 				case 0x40:
 				        strcat(cpu_model, "/P55C");
 					break;
 				case 0x70:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x80:
 				        strcat(cpu_model, "/P55C (quarter-micron)");
 					break;
 				default:
 				        /* nothing */
 					break;
 				}
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 				/*
 				 * XXX - If/when Intel fixes the bug, this
 				 * should also check the version of the
 				 * CPU, not just that it's a Pentium.
 				 */
 				has_f00f_bug = 1;
 #endif
 				break;
 			case 0x600:
 			        /* Check the particular flavor of 686 */
   			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, "Pentium Pro A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "Pentium Pro");
 					break;
 				case 0x30:
 				case 0x50:
 				case 0x60:
 				        strcat(cpu_model,
 				"Pentium II/Pentium II Xeon/Celeron");
 					cpu = CPU_PII;
 					break;
 				case 0x70:
 				case 0x80:
 				case 0xa0:
 				case 0xb0:
 				        strcat(cpu_model,
 					"Pentium III/Pentium III Xeon/Celeron");
 					cpu = CPU_PIII;
 					break;
 				default:
 				        strcat(cpu_model, "Unknown 80686");
 					break;
 				}
 				break;
 			case 0xf00:
 				strcat(cpu_model, "Pentium 4");
 				cpu = CPU_P4;
 				break;
 			default:
 				strcat(cpu_model, "unknown");
 				break;
 			}
 
 			/*
 			 * If we didn't get a brand name from the extended
 			 * CPUID, try to look it up in the brand table.
 			 */
 			if (cpu_high > 0 && *cpu_brand == '\0') {
 				brand_index = cpu_procinfo & CPUID_BRAND_INDEX;
 				if (brand_index <= MAX_BRAND_INDEX &&
 				    cpu_brandtable[brand_index] != NULL)
 					strcpy(cpu_brand,
 					    cpu_brandtable[brand_index]);
 			}
 		}
 #else
 		/* Please make up your mind folks! */
 		strcat(cpu_model, "EM64T");
 #endif
 		break;
 	case CPU_VENDOR_AMD:
 		/*
 		 * Values taken from AMD Processor Recognition
 		 * http://www.amd.com/K6/k6docs/pdf/20734g.pdf
 		 * (also describes ``Features'' encodings.
 		 */
 		strcpy(cpu_model, "AMD ");
 #ifdef __i386__
 		switch (cpu_id & 0xFF0) {
 		case 0x410:
 			strcat(cpu_model, "Standard Am486DX");
 			break;
 		case 0x430:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Through");
 			break;
 		case 0x470:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Back");
 			break;
 		case 0x480:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through");
 			break;
 		case 0x490:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back");
 			break;
 		case 0x4E0:
 			strcat(cpu_model, "Am5x86 Write-Through");
 			break;
 		case 0x4F0:
 			strcat(cpu_model, "Am5x86 Write-Back");
 			break;
 		case 0x500:
 			strcat(cpu_model, "K5 model 0");
 			break;
 		case 0x510:
 			strcat(cpu_model, "K5 model 1");
 			break;
 		case 0x520:
 			strcat(cpu_model, "K5 PR166 (model 2)");
 			break;
 		case 0x530:
 			strcat(cpu_model, "K5 PR200 (model 3)");
 			break;
 		case 0x560:
 			strcat(cpu_model, "K6");
 			break;
 		case 0x570:
 			strcat(cpu_model, "K6 266 (model 1)");
 			break;
 		case 0x580:
 			strcat(cpu_model, "K6-2");
 			break;
 		case 0x590:
 			strcat(cpu_model, "K6-III");
 			break;
 		case 0x5a0:
 			strcat(cpu_model, "Geode LX");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 			break;
 		}
 #else
 		if ((cpu_id & 0xf00) == 0xf00)
 			strcat(cpu_model, "AMD64 Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_CYRIX:
 		strcpy(cpu_model, "Cyrix ");
 		switch (cpu_id & 0xff0) {
 		case 0x440:
 			strcat(cpu_model, "MediaGX");
 			break;
 		case 0x520:
 			strcat(cpu_model, "6x86");
 			break;
 		case 0x540:
 			cpu_class = CPUCLASS_586;
 			strcat(cpu_model, "GXm");
 			break;
 		case 0x600:
 			strcat(cpu_model, "6x86MX");
 			break;
 		default:
 			/*
 			 * Even though CPU supports the cpuid
 			 * instruction, it can be disabled.
 			 * Therefore, this routine supports all Cyrix
 			 * CPUs.
 			 */
 			switch (cyrix_did & 0xf0) {
 			case 0x00:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486SLC");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486DLC");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486SLC2");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486DLC2");
 					break;
 				case 0x04:
 					strcat(cpu_model, "486SRx");
 					break;
 				case 0x05:
 					strcat(cpu_model, "486DRx");
 					break;
 				case 0x06:
 					strcat(cpu_model, "486SRx2");
 					break;
 				case 0x07:
 					strcat(cpu_model, "486DRx2");
 					break;
 				case 0x08:
 					strcat(cpu_model, "486SRu");
 					break;
 				case 0x09:
 					strcat(cpu_model, "486DRu");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486SRu2");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DRu2");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x10:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486S");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486S2");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486Se");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486S2e");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486DX");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DX2");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486DX4");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x0f) < 8)
 					strcat(cpu_model, "6x86");	/* Where did you get it? */
 				else
 					strcat(cpu_model, "5x86");
 				break;
 			case 0x30:
 				strcat(cpu_model, "6x86");
 				break;
 			case 0x40:
 				if ((cyrix_did & 0xf000) == 0x3000) {
 					cpu_class = CPUCLASS_586;
 					strcat(cpu_model, "GXm");
 				} else
 					strcat(cpu_model, "MediaGX");
 				break;
 			case 0x50:
 				strcat(cpu_model, "6x86MX");
 				break;
 			case 0xf0:
 				switch (cyrix_did & 0x0f) {
 				case 0x0d:
 					strcat(cpu_model, "Overdrive CPU");
 					break;
 				case 0x0e:
 					strcpy(cpu_model, "Texas Instruments 486SXL");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486SLC/DLC");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			default:
 				strcat(cpu_model, "Unknown");
 				break;
 			}
 			break;
 		}
 		break;
 	case CPU_VENDOR_RISE:
 		strcpy(cpu_model, "Rise ");
 		switch (cpu_id & 0xff0) {
 		case 0x500:	/* 6401 and 6441 (Kirin) */
 		case 0x520:	/* 6510 (Lynx) */
 			strcat(cpu_model, "mP6");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 		}
 		break;
 #endif
 	case CPU_VENDOR_CENTAUR:
 #ifdef __i386__
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "IDT WinChip C6");
 			break;
 		case 0x580:
 			strcpy(cpu_model, "IDT WinChip 2");
 			break;
 		case 0x590:
 			strcpy(cpu_model, "IDT WinChip 3");
 			break;
 		case 0x660:
 			strcpy(cpu_model, "VIA C3 Samuel");
 			break;
 		case 0x670:
 			if (cpu_id & 0x8)
 				strcpy(cpu_model, "VIA C3 Ezra");
 			else
 				strcpy(cpu_model, "VIA C3 Samuel 2");
 			break;
 		case 0x680:
 			strcpy(cpu_model, "VIA C3 Ezra-T");
 			break;
 		case 0x690:
 			strcpy(cpu_model, "VIA C3 Nehemiah");
 			break;
 		case 0x6a0:
 		case 0x6d0:
 			strcpy(cpu_model, "VIA C7 Esther");
 			break;
 		case 0x6f0:
 			strcpy(cpu_model, "VIA Nano");
 			break;
 		default:
 			strcpy(cpu_model, "VIA/IDT Unknown");
 		}
 #else
 		strcpy(cpu_model, "VIA ");
 		if ((cpu_id & 0xff0) == 0x6f0)
 			strcat(cpu_model, "Nano Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_IBM:
 		strcpy(cpu_model, "Blue Lightning CPU");
 		break;
 	case CPU_VENDOR_NSC:
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "Geode SC1100");
 			cpu = CPU_GEODE1100;
 			break;
 		default:
 			strcpy(cpu_model, "Geode/NSC unknown");
 			break;
 		}
 		break;
 #endif
+	case CPU_VENDOR_HYGON:
+		strcpy(cpu_model, "Hygon ");
+#ifdef __i386__
+		strcat(cpu_model, "Unknown");
+#else
+		if ((cpu_id & 0xf00) == 0xf00)
+			strcat(cpu_model, "AMD64 Processor");
+		else
+			strcat(cpu_model, "Unknown");
+#endif
+		break;
+
 	default:
 		strcat(cpu_model, "Unknown");
 		break;
 	}
 
 	/*
 	 * Replace cpu_model with cpu_brand minus leading spaces if
 	 * we have one.
 	 */
 	brand = cpu_brand;
 	while (*brand == ' ')
 		++brand;
 	if (*brand != '\0')
 		strcpy(cpu_model, brand);
 
 	printf("%s (", cpu_model);
 	if (tsc_freq != 0) {
 		hw_clockrate = (tsc_freq + 5000) / 1000000;
 		printf("%jd.%02d-MHz ",
 		    (intmax_t)(tsc_freq + 4999) / 1000000,
 		    (u_int)((tsc_freq + 4999) / 10000) % 100);
 	}
 #ifdef __i386__
 	switch(cpu_class) {
 	case CPUCLASS_286:
 		printf("286");
 		break;
 	case CPUCLASS_386:
 		printf("386");
 		break;
 #if defined(I486_CPU)
 	case CPUCLASS_486:
 		printf("486");
 		break;
 #endif
 #if defined(I586_CPU)
 	case CPUCLASS_586:
 		printf("586");
 		break;
 #endif
 #if defined(I686_CPU)
 	case CPUCLASS_686:
 		printf("686");
 		break;
 #endif
 	default:
 		printf("Unknown");	/* will panic below... */
 	}
 #else
 	printf("K8");
 #endif
 	printf("-class CPU)\n");
 	if (*cpu_vendor)
 		printf("  Origin=\"%s\"", cpu_vendor);
 	if (cpu_id)
 		printf("  Id=0x%x", cpu_id);
 
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
+	    cpu_vendor_id == CPU_VENDOR_HYGON ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 #ifdef __i386__
 	    cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	    cpu_vendor_id == CPU_VENDOR_RISE ||
 	    cpu_vendor_id == CPU_VENDOR_NSC ||
 	    (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) ||
 #endif
 	    0) {
 		printf("  Family=0x%x", CPUID_TO_FAMILY(cpu_id));
 		printf("  Model=0x%x", CPUID_TO_MODEL(cpu_id));
 		printf("  Stepping=%u", cpu_id & CPUID_STEPPING);
 #ifdef __i386__
 		if (cpu_vendor_id == CPU_VENDOR_CYRIX)
 			printf("\n  DIR=0x%04x", cyrix_did);
 #endif
 
 		/*
 		 * AMD CPUID Specification
 		 * http://support.amd.com/us/Embedded_TechDocs/25481.pdf
 		 *
 		 * Intel Processor Identification and CPUID Instruction
 		 * http://www.intel.com/assets/pdf/appnote/241618.pdf
 		 */
 		if (cpu_high > 0) {
 
 			/*
 			 * Here we should probably set up flags indicating
 			 * whether or not various features are available.
 			 * The interesting ones are probably VME, PSE, PAE,
 			 * and PGE.  The code already assumes without bothering
 			 * to check that all CPUs >= Pentium have a TSC and
 			 * MSRs.
 			 */
 			printf("\n  Features=0x%b", cpu_feature,
 			"\020"
 			"\001FPU"	/* Integral FPU */
 			"\002VME"	/* Extended VM86 mode support */
 			"\003DE"	/* Debugging Extensions (CR4.DE) */
 			"\004PSE"	/* 4MByte page tables */
 			"\005TSC"	/* Timestamp counter */
 			"\006MSR"	/* Machine specific registers */
 			"\007PAE"	/* Physical address extension */
 			"\010MCE"	/* Machine Check support */
 			"\011CX8"	/* CMPEXCH8 instruction */
 			"\012APIC"	/* SMP local APIC */
 			"\013oldMTRR"	/* Previous implementation of MTRR */
 			"\014SEP"	/* Fast System Call */
 			"\015MTRR"	/* Memory Type Range Registers */
 			"\016PGE"	/* PG_G (global bit) support */
 			"\017MCA"	/* Machine Check Architecture */
 			"\020CMOV"	/* CMOV instruction */
 			"\021PAT"	/* Page attributes table */
 			"\022PSE36"	/* 36 bit address space support */
 			"\023PN"	/* Processor Serial number */
 			"\024CLFLUSH"	/* Has the CLFLUSH instruction */
 			"\025<b20>"
 			"\026DTS"	/* Debug Trace Store */
 			"\027ACPI"	/* ACPI support */
 			"\030MMX"	/* MMX instructions */
 			"\031FXSR"	/* FXSAVE/FXRSTOR */
 			"\032SSE"	/* Streaming SIMD Extensions */
 			"\033SSE2"	/* Streaming SIMD Extensions #2 */
 			"\034SS"	/* Self snoop */
 			"\035HTT"	/* Hyperthreading (see EBX bit 16-23) */
 			"\036TM"	/* Thermal Monitor clock slowdown */
 			"\037IA64"	/* CPU can execute IA64 instructions */
 			"\040PBE"	/* Pending Break Enable */
 			);
 
 			if (cpu_feature2 != 0) {
 				printf("\n  Features2=0x%b", cpu_feature2,
 				"\020"
 				"\001SSE3"	/* SSE3 */
 				"\002PCLMULQDQ"	/* Carry-Less Mul Quadword */
 				"\003DTES64"	/* 64-bit Debug Trace */
 				"\004MON"	/* MONITOR/MWAIT Instructions */
 				"\005DS_CPL"	/* CPL Qualified Debug Store */
 				"\006VMX"	/* Virtual Machine Extensions */
 				"\007SMX"	/* Safer Mode Extensions */
 				"\010EST"	/* Enhanced SpeedStep */
 				"\011TM2"	/* Thermal Monitor 2 */
 				"\012SSSE3"	/* SSSE3 */
 				"\013CNXT-ID"	/* L1 context ID available */
 				"\014SDBG"	/* IA32 silicon debug */
 				"\015FMA"	/* Fused Multiply Add */
 				"\016CX16"	/* CMPXCHG16B Instruction */
 				"\017xTPR"	/* Send Task Priority Messages*/
 				"\020PDCM"	/* Perf/Debug Capability MSR */
 				"\021<b16>"
 				"\022PCID"	/* Process-context Identifiers*/
 				"\023DCA"	/* Direct Cache Access */
 				"\024SSE4.1"	/* SSE 4.1 */
 				"\025SSE4.2"	/* SSE 4.2 */
 				"\026x2APIC"	/* xAPIC Extensions */
 				"\027MOVBE"	/* MOVBE Instruction */
 				"\030POPCNT"	/* POPCNT Instruction */
 				"\031TSCDLT"	/* TSC-Deadline Timer */
 				"\032AESNI"	/* AES Crypto */
 				"\033XSAVE"	/* XSAVE/XRSTOR States */
 				"\034OSXSAVE"	/* OS-Enabled State Management*/
 				"\035AVX"	/* Advanced Vector Extensions */
 				"\036F16C"	/* Half-precision conversions */
 				"\037RDRAND"	/* RDRAND Instruction */
 				"\040HV"	/* Hypervisor */
 				);
 			}
 
 			if (amd_feature != 0) {
 				printf("\n  AMD Features=0x%b", amd_feature,
 				"\020"		/* in hex */
 				"\001<s0>"	/* Same */
 				"\002<s1>"	/* Same */
 				"\003<s2>"	/* Same */
 				"\004<s3>"	/* Same */
 				"\005<s4>"	/* Same */
 				"\006<s5>"	/* Same */
 				"\007<s6>"	/* Same */
 				"\010<s7>"	/* Same */
 				"\011<s8>"	/* Same */
 				"\012<s9>"	/* Same */
 				"\013<b10>"	/* Undefined */
 				"\014SYSCALL"	/* Have SYSCALL/SYSRET */
 				"\015<s12>"	/* Same */
 				"\016<s13>"	/* Same */
 				"\017<s14>"	/* Same */
 				"\020<s15>"	/* Same */
 				"\021<s16>"	/* Same */
 				"\022<s17>"	/* Same */
 				"\023<b18>"	/* Reserved, unknown */
 				"\024MP"	/* Multiprocessor Capable */
 				"\025NX"	/* Has EFER.NXE, NX */
 				"\026<b21>"	/* Undefined */
 				"\027MMX+"	/* AMD MMX Extensions */
 				"\030<s23>"	/* Same */
 				"\031<s24>"	/* Same */
 				"\032FFXSR"	/* Fast FXSAVE/FXRSTOR */
 				"\033Page1GB"	/* 1-GB large page support */
 				"\034RDTSCP"	/* RDTSCP */
 				"\035<b28>"	/* Undefined */
 				"\036LM"	/* 64 bit long mode */
 				"\0373DNow!+"	/* AMD 3DNow! Extensions */
 				"\0403DNow!"	/* AMD 3DNow! */
 				);
 			}
 
 			if (amd_feature2 != 0) {
 				printf("\n  AMD Features2=0x%b", amd_feature2,
 				"\020"
 				"\001LAHF"	/* LAHF/SAHF in long mode */
 				"\002CMP"	/* CMP legacy */
 				"\003SVM"	/* Secure Virtual Mode */
 				"\004ExtAPIC"	/* Extended APIC register */
 				"\005CR8"	/* CR8 in legacy mode */
 				"\006ABM"	/* LZCNT instruction */
 				"\007SSE4A"	/* SSE4A */
 				"\010MAS"	/* Misaligned SSE mode */
 				"\011Prefetch"	/* 3DNow! Prefetch/PrefetchW */
 				"\012OSVW"	/* OS visible workaround */
 				"\013IBS"	/* Instruction based sampling */
 				"\014XOP"	/* XOP extended instructions */
 				"\015SKINIT"	/* SKINIT/STGI */
 				"\016WDT"	/* Watchdog timer */
 				"\017<b14>"
 				"\020LWP"	/* Lightweight Profiling */
 				"\021FMA4"	/* 4-operand FMA instructions */
 				"\022TCE"	/* Translation Cache Extension */
 				"\023<b18>"
 				"\024NodeId"	/* NodeId MSR support */
 				"\025<b20>"
 				"\026TBM"	/* Trailing Bit Manipulation */
 				"\027Topology"	/* Topology Extensions */
 				"\030PCXC"	/* Core perf count */
 				"\031PNXC"	/* NB perf count */
 				"\032<b25>"
 				"\033DBE"	/* Data Breakpoint extension */
 				"\034PTSC"	/* Performance TSC */
 				"\035PL2I"	/* L2I perf count */
 				"\036MWAITX"	/* MONITORX/MWAITX instructions */
 				"\037<b30>"
 				"\040<b31>"
 				);
 			}
 
 			if (cpu_stdext_feature != 0) {
 				printf("\n  Structured Extended Features=0x%b",
 				    cpu_stdext_feature,
 				       "\020"
 				       /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 				       "\001FSGSBASE"
 				       "\002TSCADJ"
 				       "\003SGX"
 				       /* Bit Manipulation Instructions */
 				       "\004BMI1"
 				       /* Hardware Lock Elision */
 				       "\005HLE"
 				       /* Advanced Vector Instructions 2 */
 				       "\006AVX2"
 				       /* FDP_EXCPTN_ONLY */
 				       "\007FDPEXC"
 				       /* Supervisor Mode Execution Prot. */
 				       "\010SMEP"
 				       /* Bit Manipulation Instructions */
 				       "\011BMI2"
 				       "\012ERMS"
 				       /* Invalidate Processor Context ID */
 				       "\013INVPCID"
 				       /* Restricted Transactional Memory */
 				       "\014RTM"
 				       "\015PQM"
 				       "\016NFPUSG"
 				       /* Intel Memory Protection Extensions */
 				       "\017MPX"
 				       "\020PQE"
 				       /* AVX512 Foundation */
 				       "\021AVX512F"
 				       "\022AVX512DQ"
 				       /* Enhanced NRBG */
 				       "\023RDSEED"
 				       /* ADCX + ADOX */
 				       "\024ADX"
 				       /* Supervisor Mode Access Prevention */
 				       "\025SMAP"
 				       "\026AVX512IFMA"
 				       /* Formerly PCOMMIT */
 				       "\027<b22>"
 				       "\030CLFLUSHOPT"
 				       "\031CLWB"
 				       "\032PROCTRACE"
 				       "\033AVX512PF"
 				       "\034AVX512ER"
 				       "\035AVX512CD"
 				       "\036SHA"
 				       "\037AVX512BW"
 				       "\040AVX512VL"
 				       );
 			}
 
 			if (cpu_stdext_feature2 != 0) {
 				printf("\n  Structured Extended Features2=0x%b",
 				    cpu_stdext_feature2,
 				       "\020"
 				       "\001PREFETCHWT1"
 				       "\002AVX512VBMI"
 				       "\003UMIP"
 				       "\004PKU"
 				       "\005OSPKE"
 				       "\006WAITPKG"
 				       "\007AVX512VBMI2"
 				       "\011GFNI"
 				       "\012VAES"
 				       "\013VPCLMULQDQ"
 				       "\014AVX512VNNI"
 				       "\015AVX512BITALG"
 				       "\016AVX512VPOPCNTDQ"
 				       "\027RDPID"
 				       "\032CLDEMOTE"
 				       "\034MOVDIRI"
 				       "\035MOVDIR64B"
 				       "\036ENQCMD"
 				       "\037SGXLC"
 				       );
 			}
 
 			if (cpu_stdext_feature3 != 0) {
 				printf("\n  Structured Extended Features3=0x%b",
 				    cpu_stdext_feature3,
 				       "\020"
 				       "\003AVX512_4VNNIW"
 				       "\004AVX512_4FMAPS"
 				       "\011AVX512VP2INTERSECT"
 				       "\013MD_CLEAR"
 				       "\016TSXFA"
 				       "\023PCONFIG"
 				       "\033IBPB"
 				       "\034STIBP"
 				       "\035L1DFL"
 				       "\036ARCH_CAP"
 				       "\037CORE_CAP"
 				       "\040SSBD"
 				       );
 			}
 
 			if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
 				cpuid_count(0xd, 0x1, regs);
 				if (regs[0] != 0) {
 					printf("\n  XSAVE Features=0x%b",
 					    regs[0],
 					    "\020"
 					    "\001XSAVEOPT"
 					    "\002XSAVEC"
 					    "\003XINUSE"
 					    "\004XSAVES");
 				}
 			}
 
 			if (cpu_ia32_arch_caps != 0) {
 				printf("\n  IA32_ARCH_CAPS=0x%b",
 				    (u_int)cpu_ia32_arch_caps,
 				       "\020"
 				       "\001RDCL_NO"
 				       "\002IBRS_ALL"
 				       "\003RSBA"
 				       "\004SKIP_L1DFL_VME"
 				       "\005SSB_NO"
 				       "\006MDS_NO"
 				       "\010TSX_CTRL"
 				       "\011TAA_NO"
 				       );
 			}
 
 			if (amd_extended_feature_extensions != 0) {
 				u_int amd_fe_masked;
 
 				amd_fe_masked = amd_extended_feature_extensions;
 				if ((amd_fe_masked & AMDFEID_IBRS) == 0)
 					amd_fe_masked &=
 					    ~(AMDFEID_IBRS_ALWAYSON |
 						AMDFEID_PREFER_IBRS);
 				if ((amd_fe_masked & AMDFEID_STIBP) == 0)
 					amd_fe_masked &=
 					    ~AMDFEID_STIBP_ALWAYSON;
 
 				printf("\n  "
 				    "AMD Extended Feature Extensions ID EBX="
 				    "0x%b", amd_fe_masked,
 				    "\020"
 				    "\001CLZERO"
 				    "\002IRPerf"
 				    "\003XSaveErPtr"
 				    "\005RDPRU"
 				    "\011MCOMMIT"
 				    "\012WBNOINVD"
 				    "\015IBPB"
 				    "\017IBRS"
 				    "\020STIBP"
 				    "\021IBRS_ALWAYSON"
 				    "\022STIBP_ALWAYSON"
 				    "\023PREFER_IBRS"
 				    "\031SSBD"
 				    "\032VIRT_SSBD"
 				    "\033SSB_NO"
 				    );
 			}
 
 			if (via_feature_rng != 0 || via_feature_xcrypt != 0)
 				print_via_padlock_info();
 
 			if (cpu_feature2 & CPUID2_VMX)
 				print_vmx_info();
 
 			if (amd_feature2 & AMDID2_SVM)
 				print_svm_info();
 
 			if ((cpu_feature & CPUID_HTT) &&
-			    cpu_vendor_id == CPU_VENDOR_AMD)
+			    (cpu_vendor_id == CPU_VENDOR_AMD ||
+			     cpu_vendor_id == CPU_VENDOR_HYGON))
 				cpu_feature &= ~CPUID_HTT;
 
 			/*
 			 * If this CPU supports P-state invariant TSC then
 			 * mention the capability.
 			 */
 			if (tsc_is_invariant) {
 				printf("\n  TSC: P-state invariant");
 				if (tsc_perf_stat)
 					printf(", performance statistics");
 			}
 		}
 #ifdef __i386__
 	} else if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		printf("  DIR=0x%04x", cyrix_did);
 		printf("  Stepping=%u", (cyrix_did & 0xf000) >> 12);
 		printf("  Revision=%u", (cyrix_did & 0x0f00) >> 8);
 #ifndef CYRIX_CACHE_REALLY_WORKS
 		if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700)
 			printf("\n  CPU cache: write-through mode");
 #endif
 #endif
 	}
 
 	/* Avoid ugly blank lines: only print newline when we have to. */
 	if (*cpu_vendor || cpu_id)
 		printf("\n");
 
 	if (bootverbose) {
-		if (cpu_vendor_id == CPU_VENDOR_AMD)
+		if (cpu_vendor_id == CPU_VENDOR_AMD ||
+		    cpu_vendor_id == CPU_VENDOR_HYGON)
 			print_AMD_info();
 		else if (cpu_vendor_id == CPU_VENDOR_INTEL)
 			print_INTEL_info();
 #ifdef __i386__
 		else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA)
 			print_transmeta_info();
 #endif
 	}
 
 	print_hypervisor_info();
 }
 
 #ifdef __i386__
 void
 panicifcpuunsupported(void)
 {
 
 #if !defined(lint)
 #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
 #error This kernel is not configured for one of the supported CPUs
 #endif
 #else /* lint */
 #endif /* lint */
 	/*
 	 * Now that we have told the user what they have,
 	 * let them know if that machine type isn't configured.
 	 */
 	switch (cpu_class) {
 	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
 	case CPUCLASS_386:
 #if !defined(I486_CPU)
 	case CPUCLASS_486:
 #endif
 #if !defined(I586_CPU)
 	case CPUCLASS_586:
 #endif
 #if !defined(I686_CPU)
 	case CPUCLASS_686:
 #endif
 		panic("CPU class not configured");
 	default:
 		break;
 	}
 }
 
 static	volatile u_int trap_by_rdmsr;
 
 /*
  * Special exception 6 handler.
  * The rdmsr instruction generates invalid opcodes fault on 486-class
  * Cyrix CPU.  Stacked eip register points the rdmsr instruction in the
  * function identblue() when this handler is called.  Stacked eip should
  * be advanced.
  */
 inthand_t	bluetrap6;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap6)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap6)) ":					\n\
 	ss								\n\
 	movl	$0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Special exception 13 handler.
  * Accessing non-existent MSR generates general protection fault.
  */
 inthand_t	bluetrap13;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap13)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap13)) ":					\n\
 	ss								\n\
 	movl	$0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	popl	%eax		/* discard error code */		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not
  * support cpuid instruction.  This function should be called after
  * loading interrupt descriptor table register.
  *
  * I don't like this method that handles fault, but I couldn't get
  * information for any other methods.  Does blue giant know?
  */
 static int
 identblue(void)
 {
 
 	trap_by_rdmsr = 0;
 
 	/*
 	 * Cyrix 486-class CPU does not support rdmsr instruction.
 	 * The rdmsr instruction generates invalid opcode fault, and exception
 	 * will be trapped by bluetrap6() on Cyrix 486-class CPU.  The
 	 * bluetrap6() set the magic number to trap_by_rdmsr.
 	 */
 	setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU.
 	 * In this case, rdmsr generates general protection fault, and
 	 * exception will be trapped by bluetrap13().
 	 */
 	setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	rdmsr(0x1002);		/* Cyrix CPU generates fault. */
 
 	if (trap_by_rdmsr == 0xa8c1d)
 		return IDENTBLUE_CYRIX486;
 	else if (trap_by_rdmsr == 0xa89c4)
 		return IDENTBLUE_CYRIXM2;
 	return IDENTBLUE_IBMCPU;
 }
 
 
 /*
  * identifycyrix() set lower 16 bits of cyrix_did as follows:
  *
  *  F E D C B A 9 8 7 6 5 4 3 2 1 0
  * +-------+-------+---------------+
  * |  SID  |  RID  |   Device ID   |
  * |    (DIR 1)    |    (DIR 0)    |
  * +-------+-------+---------------+
  */
 static void
 identifycyrix(void)
 {
 	register_t saveintr;
 	int	ccr2_test = 0, dir_test = 0;
 	u_char	ccr2, ccr3;
 
 	saveintr = intr_disable();
 
 	ccr2 = read_cyrix_reg(CCR2);
 	write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW);
 	read_cyrix_reg(CCR2);
 	if (read_cyrix_reg(CCR2) != ccr2)
 		ccr2_test = 1;
 	write_cyrix_reg(CCR2, ccr2);
 
 	ccr3 = read_cyrix_reg(CCR3);
 	write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3);
 	read_cyrix_reg(CCR3);
 	if (read_cyrix_reg(CCR3) != ccr3)
 		dir_test = 1;					/* CPU supports DIRs. */
 	write_cyrix_reg(CCR3, ccr3);
 
 	if (dir_test) {
 		/* Device ID registers are available. */
 		cyrix_did = read_cyrix_reg(DIR1) << 8;
 		cyrix_did += read_cyrix_reg(DIR0);
 	} else if (ccr2_test)
 		cyrix_did = 0x0010;		/* 486S A-step */
 	else
 		cyrix_did = 0x00ff;		/* Old 486SLC/DLC and TI486SXLC/SXL */
 
 	intr_restore(saveintr);
 }
 #endif
 
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status)
 {
 
 	/* If there was an error during the transition, don't do anything. */
 	if (status != 0)
 		return;
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	hw_clockrate = level->total_set.freq;
 }
 
 static void
 hook_tsc_freq(void *arg __unused)
 {
 
 	if (tsc_is_invariant)
 		return;
 
 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
 
 static const struct {
 	const char *	vm_bname;
 	int		vm_guest;
 } vm_bnames[] = {
 	{ "QEMU",	VM_GUEST_VM },		/* QEMU */
 	{ "Plex86",	VM_GUEST_VM },		/* Plex86 */
 	{ "Bochs",	VM_GUEST_VM },		/* Bochs */
 	{ "Xen",	VM_GUEST_XEN },		/* Xen */
 	{ "BHYVE",	VM_GUEST_BHYVE },	/* bhyve */
 	{ "Seabios",	VM_GUEST_KVM },		/* KVM */
 };
 
 static const struct {
 	const char *	vm_pname;
 	int		vm_guest;
 } vm_pnames[] = {
 	{ "VMware Virtual Platform",	VM_GUEST_VMWARE },
 	{ "Virtual Machine",		VM_GUEST_VM }, /* Microsoft VirtualPC */
 	{ "VirtualBox",			VM_GUEST_VBOX },
 	{ "Parallels Virtual Platform",	VM_GUEST_PARALLELS },
 	{ "KVM",			VM_GUEST_KVM },
 };
 
 static struct {
 	const char	*vm_cpuid;
 	int		vm_guest;
 } vm_cpuids[] = {
 	{ "XENXENXEN",		VM_GUEST_XEN },		/* XEN */
 	{ "Microsoft Hv",	VM_GUEST_HV },		/* Microsoft Hyper-V */
 	{ "VMwareVMware",	VM_GUEST_VMWARE },	/* VMware VM */
 	{ "KVMKVMKVM",		VM_GUEST_KVM },		/* KVM */
 	{ "bhyve bhyve ",	VM_GUEST_BHYVE },	/* bhyve */
 	{ "VBoxVBoxVBox",	VM_GUEST_VBOX },	/* VirtualBox */
 };
 
 static void
 identify_hypervisor_cpuid_base(void)
 {
 	u_int leaf, regs[4];
 	int i;
 
 	/*
 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
 	 * http://lkml.org/lkml/2008/10/1/246
 	 *
 	 * KB1009458: Mechanisms to determine if software is running in
 	 * a VMware virtual machine
 	 * http://kb.vmware.com/kb/1009458
 	 *
 	 * Search for a hypervisor that we recognize. If we cannot find
 	 * a specific hypervisor, return the first information about the
 	 * hypervisor that we found, as others may be able to use.
 	 */
 	for (leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
 		do_cpuid(leaf, regs);
 
 		/*
 		 * KVM from Linux kernels prior to commit
 		 * 57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 set %eax
 		 * to 0 rather than a valid hv_high value.  Check for
 		 * the KVM signature bytes and fixup %eax to the
 		 * highest supported leaf in that case.
 		 */
 		if (regs[0] == 0 && regs[1] == 0x4b4d564b &&
 		    regs[2] == 0x564b4d56 && regs[3] == 0x0000004d)
 			regs[0] = leaf + 1;
 			
 		if (regs[0] >= leaf) {
 			for (i = 0; i < nitems(vm_cpuids); i++)
 				if (strncmp((const char *)&regs[1],
 				    vm_cpuids[i].vm_cpuid, 12) == 0) {
 					vm_guest = vm_cpuids[i].vm_guest;
 					break;
 				}
 
 			/*
 			 * If this is the first entry or we found a
 			 * specific hypervisor, record the base, high value,
 			 * and vendor identifier.
 			 */
 			if (vm_guest != VM_GUEST_VM || leaf == 0x40000000) {
 				hv_base = leaf;
 				hv_high = regs[0];
 				((u_int *)&hv_vendor)[0] = regs[1];
 				((u_int *)&hv_vendor)[1] = regs[2];
 				((u_int *)&hv_vendor)[2] = regs[3];
 				hv_vendor[12] = '\0';
 
 				/*
 				 * If we found a specific hypervisor, then
 				 * we are finished.
 				 */
 				if (vm_guest != VM_GUEST_VM)
 					return;
 			}
 		}
 	}
 }
 
 void
 identify_hypervisor(void)
 {
 	u_int regs[4];
 	char *p;
 	int i;
 
 	/*
 	 * If CPUID2_HV is set, we are running in a hypervisor environment.
 	 */
 	if (cpu_feature2 & CPUID2_HV) {
 		vm_guest = VM_GUEST_VM;
 		identify_hypervisor_cpuid_base();
 
 		/* If we have a definitive vendor, we can return now. */
 		if (*hv_vendor != '\0')
 			return;
 	}
 
 	/*
 	 * Examine SMBIOS strings for older hypervisors.
 	 */
 	p = kern_getenv("smbios.system.serial");
 	if (p != NULL) {
 		if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
 			vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
 			if (regs[1] == VMW_HVMAGIC) {
 				vm_guest = VM_GUEST_VMWARE;			
 				freeenv(p);
 				return;
 			}
 		}
 		freeenv(p);
 	}
 
 	/*
 	 * XXX: Some of these entries may not be needed since they were
 	 * added to FreeBSD before the checks above.
 	 */
 	p = kern_getenv("smbios.bios.vendor");
 	if (p != NULL) {
 		for (i = 0; i < nitems(vm_bnames); i++)
 			if (strcmp(p, vm_bnames[i].vm_bname) == 0) {
 				vm_guest = vm_bnames[i].vm_guest;
 				/* If we have a specific match, return */
 				if (vm_guest != VM_GUEST_VM) {
 					freeenv(p);
 					return;
 				}
 				/*
 				 * We are done with bnames, but there might be
 				 * a more specific match in the pnames
 				 */
 				break;
 			}
 		freeenv(p);
 	}
 	p = kern_getenv("smbios.system.product");
 	if (p != NULL) {
 		for (i = 0; i < nitems(vm_pnames); i++)
 			if (strcmp(p, vm_pnames[i].vm_pname) == 0) {
 				vm_guest = vm_pnames[i].vm_guest;
 				freeenv(p);
 				return;
 			}
 		freeenv(p);
 	}
 }
 
 bool
 fix_cpuid(void)
 {
 	uint64_t msr;
 
 	/*
 	 * Clear "Limit CPUID Maxval" bit and return true if the caller should
 	 * get the largest standard CPUID function number again if it is set
 	 * from BIOS.  It is necessary for probing correct CPU topology later
 	 * and for the correct operation of the AVX-aware userspace.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    ((CPUID_TO_FAMILY(cpu_id) == 0xf &&
 	    CPUID_TO_MODEL(cpu_id) >= 0x3) ||
 	    (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    CPUID_TO_MODEL(cpu_id) >= 0xe))) {
 		msr = rdmsr(MSR_IA32_MISC_ENABLE);
 		if ((msr & IA32_MISC_EN_LIMCPUID) != 0) {
 			msr &= ~IA32_MISC_EN_LIMCPUID;
 			wrmsr(MSR_IA32_MISC_ENABLE, msr);
 			return (true);
 		}
 	}
 
 	/*
 	 * Re-enable AMD Topology Extension that could be disabled by BIOS
 	 * on some notebook processors.  Without the extension it's really
 	 * hard to determine the correct CPU cache topology.
 	 * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h
 	 * Models 60h-6Fh Processors, Publication # 50742.
 	 */
 	if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x15) {
 		msr = rdmsr(MSR_EXTFEATURES);
 		if ((msr & ((uint64_t)1 << 54)) == 0) {
 			msr |= (uint64_t)1 << 54;
 			wrmsr(MSR_EXTFEATURES, msr);
 			return (true);
 		}
 	}
 	return (false);
 }
 
 void
 identify_cpu1(void)
 {
 	u_int regs[4];
 
 	do_cpuid(0, regs);
 	cpu_high = regs[0];
 	((u_int *)&cpu_vendor)[0] = regs[1];
 	((u_int *)&cpu_vendor)[1] = regs[3];
 	((u_int *)&cpu_vendor)[2] = regs[2];
 	cpu_vendor[12] = '\0';
 
 	do_cpuid(1, regs);
 	cpu_id = regs[0];
 	cpu_procinfo = regs[1];
 	cpu_feature = regs[3];
 	cpu_feature2 = regs[2];
 }
 
 void
 identify_cpu2(void)
 {
 	u_int regs[4], cpu_stdext_disable;
 
 	if (cpu_high >= 6) {
 		cpuid_count(6, 0, regs);
 		cpu_power_eax = regs[0];
 		cpu_power_ebx = regs[1];
 		cpu_power_ecx = regs[2];
 		cpu_power_edx = regs[3];
 	}
 
 	if (cpu_high >= 7) {
 		cpuid_count(7, 0, regs);
 		cpu_stdext_feature = regs[1];
 
 		/*
 		 * Some hypervisors failed to filter out unsupported
 		 * extended features.  Allow to disable the
 		 * extensions, activation of which requires setting a
 		 * bit in CR4, and which VM monitors do not support.
 		 */
 		cpu_stdext_disable = 0;
 		TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable);
 		cpu_stdext_feature &= ~cpu_stdext_disable;
 
 		cpu_stdext_feature2 = regs[2];
 		cpu_stdext_feature3 = regs[3];
 
 		if ((cpu_stdext_feature3 & CPUID_STDEXT3_ARCH_CAP) != 0)
 			cpu_ia32_arch_caps = rdmsr(MSR_IA32_ARCH_CAP);
 	}
 }
 
 void
 identify_cpu_fixup_bsp(void)
 {
 	u_int regs[4];
 
 	cpu_vendor_id = find_cpu_vendor_id();
 
 	if (fix_cpuid()) {
 		do_cpuid(0, regs);
 		cpu_high = regs[0];
 	}
 }
 
 /*
  * Final stage of CPU identification.
  */
 void
 finishidentcpu(void)
 {
 	u_int regs[4];
 #ifdef __i386__
 	u_char ccr3;
 #endif
 
 	identify_cpu_fixup_bsp();
 
 	if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) {
 		do_cpuid(5, regs);
 		cpu_mon_mwait_flags = regs[2];
 		cpu_mon_min_size = regs[0] &  CPUID5_MON_MIN_SIZE;
 		cpu_mon_max_size = regs[1] &  CPUID5_MON_MAX_SIZE;
 	}
 
 	identify_cpu2();
 
 #ifdef __i386__
 	if (cpu_high > 0 &&
 	    (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	     cpu_vendor_id == CPU_VENDOR_AMD ||
+	     cpu_vendor_id == CPU_VENDOR_HYGON ||
 	     cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	     cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 	     cpu_vendor_id == CPU_VENDOR_NSC)) {
 		do_cpuid(0x80000000, regs);
 		if (regs[0] >= 0x80000000)
 			cpu_exthigh = regs[0];
 	}
 #else
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
+	    cpu_vendor_id == CPU_VENDOR_HYGON ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR) {
 		do_cpuid(0x80000000, regs);
 		cpu_exthigh = regs[0];
 	}
 #endif
 	if (cpu_exthigh >= 0x80000001) {
 		do_cpuid(0x80000001, regs);
 		amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
 		amd_feature2 = regs[2];
 	}
 	if (cpu_exthigh >= 0x80000007) {
 		do_cpuid(0x80000007, regs);
 		amd_rascap = regs[1];
 		amd_pminfo = regs[3];
 	}
 	if (cpu_exthigh >= 0x80000008) {
 		do_cpuid(0x80000008, regs);
 		cpu_maxphyaddr = regs[0] & 0xff;
 		amd_extended_feature_extensions = regs[1];
 		cpu_procinfo2 = regs[2];
 	} else {
 		cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32;
 	}
 
 #ifdef __i386__
 	if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		if (cpu == CPU_486) {
 			/*
 			 * These conditions are equivalent to:
 			 *     - CPU does not support cpuid instruction.
 			 *     - Cyrix/IBM CPU is detected.
 			 */
 			if (identblue() == IDENTBLUE_IBMCPU) {
 				strcpy(cpu_vendor, "IBM");
 				cpu_vendor_id = CPU_VENDOR_IBM;
 				cpu = CPU_BLUE;
 				return;
 			}
 		}
 		switch (cpu_id & 0xf00) {
 		case 0x600:
 			/*
 			 * Cyrix's datasheet does not describe DIRs.
 			 * Therefor, I assume it does not have them
 			 * and use the result of the cpuid instruction.
 			 * XXX they seem to have it for now at least. -Peter
 			 */
 			identifycyrix();
 			cpu = CPU_M2;
 			break;
 		default:
 			identifycyrix();
 			/*
 			 * This routine contains a trick.
 			 * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now.
 			 */
 			switch (cyrix_did & 0x00f0) {
 			case 0x00:
 			case 0xf0:
 				cpu = CPU_486DLC;
 				break;
 			case 0x10:
 				cpu = CPU_CY486DX;
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x000f) < 8)
 					cpu = CPU_M1;
 				else
 					cpu = CPU_M1SC;
 				break;
 			case 0x30:
 				cpu = CPU_M1;
 				break;
 			case 0x40:
 				/* MediaGX CPU */
 				cpu = CPU_M1SC;
 				break;
 			default:
 				/* M2 and later CPUs are treated as M2. */
 				cpu = CPU_M2;
 
 				/*
 				 * enable cpuid instruction.
 				 */
 				ccr3 = read_cyrix_reg(CCR3);
 				write_cyrix_reg(CCR3, CCR3_MAPEN0);
 				write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID);
 				write_cyrix_reg(CCR3, ccr3);
 
 				do_cpuid(0, regs);
 				cpu_high = regs[0];	/* eax */
 				do_cpuid(1, regs);
 				cpu_id = regs[0];	/* eax */
 				cpu_feature = regs[3];	/* edx */
 				break;
 			}
 		}
 	} else if (cpu == CPU_486 && *cpu_vendor == '\0') {
 		/*
 		 * There are BlueLightning CPUs that do not change
 		 * undefined flags by dividing 5 by 2.  In this case,
 		 * the CPU identification routine in locore.s leaves
 		 * cpu_vendor null string and puts CPU_486 into the
 		 * cpu.
 		 */
 		if (identblue() == IDENTBLUE_IBMCPU) {
 			strcpy(cpu_vendor, "IBM");
 			cpu_vendor_id = CPU_VENDOR_IBM;
 			cpu = CPU_BLUE;
 			return;
 		}
 	}
 #endif
 }
 
 int
 pti_get_default(void)
 {
 
-	if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0)
+	if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0 ||
+	    strcmp(cpu_vendor, HYGON_VENDOR_ID) == 0)
 		return (0);
 	if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0)
 		return (0);
 	return (1);
 }
 
 static u_int
 find_cpu_vendor_id(void)
 {
 	int	i;
 
 	for (i = 0; i < nitems(cpu_vendors); i++)
 		if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0)
 			return (cpu_vendors[i].vendor_id);
 	return (0);
 }
 
 static void
 print_AMD_assoc(int i)
 {
 	if (i == 255)
 		printf(", fully associative\n");
 	else
 		printf(", %d-way associative\n", i);
 }
 
 static void
 print_AMD_l2_assoc(int i)
 {
 	switch (i & 0x0f) {
 	case 0: printf(", disabled/not present\n"); break;
 	case 1: printf(", direct mapped\n"); break;
 	case 2: printf(", 2-way associative\n"); break;
 	case 4: printf(", 4-way associative\n"); break;
 	case 6: printf(", 8-way associative\n"); break;
 	case 8: printf(", 16-way associative\n"); break;
 	case 15: printf(", fully associative\n"); break;
 	default: printf(", reserved configuration\n"); break;
 	}
 }
 
 static void
 print_AMD_info(void)
 {
 #ifdef __i386__
 	uint64_t amd_whcr;
 #endif
 	u_int regs[4];
 
 	if (cpu_exthigh >= 0x80000005) {
 		do_cpuid(0x80000005, regs);
 		printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff);
 		print_AMD_assoc(regs[0] >> 24);
 
 		printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff);
 		print_AMD_assoc((regs[0] >> 8) & 0xff);
 
 		printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff);
 		print_AMD_assoc(regs[1] >> 24);
 
 		printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff);
 		print_AMD_assoc((regs[1] >> 8) & 0xff);
 
 		printf("L1 data cache: %d kbytes", regs[2] >> 24);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0xff);
 		print_AMD_assoc((regs[2] >> 16) & 0xff);
 
 		printf("L1 instruction cache: %d kbytes", regs[3] >> 24);
 		printf(", %d bytes/line", regs[3] & 0xff);
 		printf(", %d lines/tag", (regs[3] >> 8) & 0xff);
 		print_AMD_assoc((regs[3] >> 16) & 0xff);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		if ((regs[0] >> 16) != 0) {
 			printf("L2 2MB data TLB: %d entries",
 			    (regs[0] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[0] >> 28);
 			printf("L2 2MB instruction TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		} else {
 			printf("L2 2MB unified TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		}
 		if ((regs[1] >> 16) != 0) {
 			printf("L2 4KB data TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[1] >> 28);
 
 			printf("L2 4KB instruction TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		} else {
 			printf("L2 4KB unified TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		}
 		printf("L2 unified cache: %d kbytes", regs[2] >> 16);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0x0f);
 		print_AMD_l2_assoc((regs[2] >> 12) & 0x0f);
 	}
 
 #ifdef __i386__
 	if (((cpu_id & 0xf00) == 0x500)
 	    && (((cpu_id & 0x0f0) > 0x80)
 		|| (((cpu_id & 0x0f0) == 0x80)
 		    && (cpu_id & 0x00f) > 0x07))) {
 		/* K6-2(new core [Stepping 8-F]), K6-III or later */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x3ff << 22))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & (1 << 16)) ? "Enable" : "Disable");
 		}
 	} else if (((cpu_id & 0xf00) == 0x500)
 		   && ((cpu_id & 0x0f0) > 0x50)) {
 		/* K6, K6-2(old core) */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x7f << 1))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & 0x0001) ? "Enable" : "Disable");
 			printf("Hardware Write Allocate Control: %s\n",
 			    (amd_whcr & 0x0100) ? "Enable" : "Disable");
 		}
 	}
 #endif
 	/*
 	 * Opteron Rev E shows a bug as in very rare occasions a read memory
 	 * barrier is not performed as expected if it is followed by a
 	 * non-atomic read-modify-write instruction.
 	 * As long as that bug pops up very rarely (intensive machine usage
 	 * on other operating systems generally generates one unexplainable
 	 * crash any 2 months) and as long as a model specific fix would be
 	 * impractical at this stage, print out a warning string if the broken
 	 * model and family are identified.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 &&
 	    CPUID_TO_MODEL(cpu_id) <= 0x3f)
 		printf("WARNING: This architecture revision has known SMP "
 		    "hardware bugs which may cause random instability\n");
 }
 
 static void
 print_INTEL_info(void)
 {
 	u_int regs[4];
 	u_int rounds, regnum;
 	u_int nwaycode, nway;
 
 	if (cpu_high >= 2) {
 		rounds = 0;
 		do {
 			do_cpuid(0x2, regs);
 			if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0)
 				break;	/* we have a buggy CPU */
 
 			for (regnum = 0; regnum <= 3; ++regnum) {
 				if (regs[regnum] & (1<<31))
 					continue;
 				if (regnum != 0)
 					print_INTEL_TLB(regs[regnum] & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 8) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 16) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 24) & 0xff);
 			}
 		} while (--rounds > 0);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		nwaycode = (regs[2] >> 12) & 0x0f;
 		if (nwaycode >= 0x02 && nwaycode <= 0x08)
 			nway = 1 << (nwaycode / 2);
 		else
 			nway = 0;
 		printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n",
 		    (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff);
 	}
 }
 
 static void
 print_INTEL_TLB(u_int data)
 {
 	switch (data) {
 	case 0x0:
 	case 0x40:
 	default:
 		break;
 	case 0x1:
 		printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n");
 		break;
 	case 0x2:
 		printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n");
 		break;
 	case 0x3:
 		printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n");
 		break;
 	case 0x4:
 		printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n");
 		break;
 	case 0x6:
 		printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x8:
 		printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x9:
 		printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xa:
 		printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n");
 		break;
 	case 0xb:
 		printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n");
 		break;
 	case 0xc:
 		printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0xd:
 		printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size");
 		break;
 	case 0xe:
 		printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n");
 		break;
 	case 0x1d:
 		printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n");
 		break;
 	case 0x21:
 		printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x22:
 		printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x23:
 		printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x24:
 		printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x25:
 		printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x29:
 		printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x2c:
 		printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x30:
 		printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x39: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3b: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3c: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x41:
 		printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x42:
 		printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x43:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x44:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x45:
 		printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x46:
 		printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x47:
 		printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x48:
 		printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x49:
 		if (CPUID_TO_FAMILY(cpu_id) == 0xf &&
 		    CPUID_TO_MODEL(cpu_id) == 0x6)
 			printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n");
 		else
 			printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size");
 		break;
 	case 0x4a:
 		printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x4b:
 		printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x4c:
 		printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x4d:
 		printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x4e:
 		printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0x4f:
 		printf("Instruction TLB: 4 KByte pages, 32 entries\n");
 		break;
 	case 0x50:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x51:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x52:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x55:
 		printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n");
 		break;
 	case 0x56:
 		printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n");
 		break;
 	case 0x57:
 		printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n");
 		break;
 	case 0x59:
 		printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n");
 		break;
 	case 0x5a:
 		printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n");
 		break;
 	case 0x5b:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x5c:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x5d:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x60:
 		printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x61:
 		printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n");
 		break;
 	case 0x63:
 		printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n");
 		break;
 	case 0x64:
 		printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n");
 		break;
 	case 0x66:
 		printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x67:
 		printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x68:
 		printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x6a:
 		printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n");
 		break;
 	case 0x6b:
 		printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n");
 		break;
 	case 0x6c:
 		printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n");
 		break;
 	case 0x6d:
 		printf("DTLB: 1 GByte pages, fully associative, 16 entries\n");
 		break;
 	case 0x70:
 		printf("Trace cache: 12K-uops, 8-way set associative\n");
 		break;
 	case 0x71:
 		printf("Trace cache: 16K-uops, 8-way set associative\n");
 		break;
 	case 0x72:
 		printf("Trace cache: 32K-uops, 8-way set associative\n");
 		break;
 	case 0x76:
 		printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n");
 		break;
 	case 0x78:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n");
 		break;
 	case 0x79:
 		printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7a:
 		printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7b:
 		printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7c:
 		printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7d:
 		printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n");
 		break;
 	case 0x7f:
 		printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n");
 		break;
 	case 0x80:
 		printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n");
 		break;
 	case 0x82:
 		printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x83:
 		printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x84:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x85:
 		printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x86:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x87:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xa0:
 		printf("DTLB: 4k pages, fully associative, 32 entries\n");
 		break;
 	case 0xb0:
 		printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	case 0xb1:
 		printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n");
 		break;
 	case 0xb2:
 		printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n");
 		break;
 	case 0xb3:
 		printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	case 0xb4:
 		printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n");
 		break;
 	case 0xb5:
 		printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n");
 		break;
 	case 0xb6:
 		printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n");
 		break;
 	case 0xba:
 		printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n");
 		break;
 	case 0xc0:
 		printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n");
 		break;
 	case 0xc1:
 		printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n");
 		break;
 	case 0xc2:
 		printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n");
 		break;
 	case 0xc3:
 		printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n");
 		break;
 	case 0xc4:
 		printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n");
 		break;
 	case 0xca:
 		printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n");
 		break;
 	case 0xd0:
 		printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd1:
 		printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd2:
 		printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd6:
 		printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xd7:
 		printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xd8:
 		printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xdc:
 		printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xdd:
 		printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xde:
 		printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xe2:
 		printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xe3:
 		printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xe4:
 		printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xea:
 		printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xeb:
 		printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xec:
 		printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xf0:
 		printf("64-Byte prefetching\n");
 		break;
 	case 0xf1:
 		printf("128-Byte prefetching\n");
 		break;
 	}
 }
 
 static void
 print_svm_info(void)
 {
 	u_int features, regs[4];
 	uint64_t msr;
 	int comma;
 
 	printf("\n  SVM: ");
 	do_cpuid(0x8000000A, regs);
 	features = regs[3];
 
 	msr = rdmsr(MSR_VM_CR);
 	if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS)
 		printf("(disabled in BIOS) ");
 
 	if (!bootverbose) {
 		comma = 0;
 		if (features & (1 << 0)) {
 			printf("%sNP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 3)) {
 			printf("%sNRIP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 5)) {
 			printf("%sVClean", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 6)) {
 			printf("%sAFlush", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 7)) {
 			printf("%sDAssist", comma ? "," : "");
                         comma = 1; 
 		}
 		printf("%sNAsids=%d", comma ? "," : "", regs[1]);
 		return;
 	}
 
 	printf("Features=0x%b", features,
 	       "\020"
 	       "\001NP"			/* Nested paging */
 	       "\002LbrVirt"		/* LBR virtualization */
 	       "\003SVML"		/* SVM lock */
 	       "\004NRIPS"		/* NRIP save */
 	       "\005TscRateMsr"		/* MSR based TSC rate control */
 	       "\006VmcbClean"		/* VMCB clean bits */
 	       "\007FlushByAsid"	/* Flush by ASID */
 	       "\010DecodeAssist"	/* Decode assist */
 	       "\011<b8>"
 	       "\012<b9>"
 	       "\013PauseFilter"	/* PAUSE intercept filter */    
 	       "\014EncryptedMcodePatch"
 	       "\015PauseFilterThreshold" /* PAUSE filter threshold */
 	       "\016AVIC"		/* virtual interrupt controller */
 	       "\017<b14>"
 	       "\020V_VMSAVE_VMLOAD"
 	       "\021vGIF"
 	       "\022GMET"		/* Guest Mode Execute Trap */
 	       "\023<b18>"
 	       "\024<b19>"
 	       "\025<b20>"
 	       "\026<b21>"
 	       "\027<b22>"
 	       "\030<b23>"
 	       "\031<b24>"
 	       "\032<b25>"
 	       "\033<b26>"
 	       "\034<b27>"
 	       "\035<b28>"
 	       "\036<b29>"
 	       "\037<b30>"
 	       "\040<b31>"
                 );
 	printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]);
 }
 
 #ifdef __i386__
 static void
 print_transmeta_info(void)
 {
 	u_int regs[4], nreg = 0;
 
 	do_cpuid(0x80860000, regs);
 	nreg = regs[0];
 	if (nreg >= 0x80860001) {
 		do_cpuid(0x80860001, regs);
 		printf("  Processor revision %u.%u.%u.%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff);
 	}
 	if (nreg >= 0x80860002) {
 		do_cpuid(0x80860002, regs);
 		printf("  Code Morphing Software revision %u.%u.%u-%u-%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff,
 		       regs[2]);
 	}
 	if (nreg >= 0x80860006) {
 		char info[65];
 		do_cpuid(0x80860003, (u_int*) &info[0]);
 		do_cpuid(0x80860004, (u_int*) &info[16]);
 		do_cpuid(0x80860005, (u_int*) &info[32]);
 		do_cpuid(0x80860006, (u_int*) &info[48]);
 		info[64] = 0;
 		printf("  %s\n", info);
 	}
 }
 #endif
 
 static void
 print_via_padlock_info(void)
 {
 	u_int regs[4];
 
 	do_cpuid(0xc0000001, regs);
 	printf("\n  VIA Padlock Features=0x%b", regs[3],
 	"\020"
 	"\003RNG"		/* RNG */
 	"\007AES"		/* ACE */
 	"\011AES-CTR"		/* ACE2 */
 	"\013SHA1,SHA256"	/* PHE */
 	"\015RSA"		/* PMM */
 	);
 }
 
 static uint32_t
 vmx_settable(uint64_t basic, int msr, int true_msr)
 {
 	uint64_t val;
 
 	if (basic & (1ULL << 55))
 		val = rdmsr(true_msr);
 	else
 		val = rdmsr(msr);
 
 	/* Just report the controls that can be set to 1. */
 	return (val >> 32);
 }
 
 static void
 print_vmx_info(void)
 {
 	uint64_t basic, msr;
 	uint32_t entry, exit, mask, pin, proc, proc2;
 	int comma;
 
 	printf("\n  VT-x: ");
 	msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
 		printf("(disabled in BIOS) ");
 	basic = rdmsr(MSR_VMX_BASIC);
 	pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS,
 	    MSR_VMX_TRUE_PINBASED_CTLS);
 	proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS,
 	    MSR_VMX_TRUE_PROCBASED_CTLS);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2,
 		    MSR_VMX_PROCBASED_CTLS2);
 	else
 		proc2 = 0;
 	exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS);
 	entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS);
 
 	if (!bootverbose) {
 		comma = 0;
 		if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT &&
 		    entry & VM_ENTRY_LOAD_PAT) {
 			printf("%sPAT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_HLT_EXITING) {
 			printf("%sHLT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_MTF) {
 			printf("%sMTF", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_PAUSE_EXITING) {
 			printf("%sPAUSE", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_EPT) {
 			printf("%sEPT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) {
 			printf("%sUG", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_VPID) {
 			printf("%sVPID", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_USE_TPR_SHADOW &&
 		    proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES &&
 		    proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE &&
 		    proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION &&
 		    proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) {
 			printf("%sVID", comma ? "," : "");
 			comma = 1;
 			if (pin & PINBASED_POSTED_INTERRUPT)
 				printf(",PostIntr");
 		}
 		return;
 	}
 
 	mask = basic >> 32;
 	printf("Basic Features=0x%b", mask,
 	"\020"
 	"\02132PA"		/* 32-bit physical addresses */
 	"\022SMM"		/* SMM dual-monitor */
 	"\027INS/OUTS"		/* VM-exit info for INS and OUTS */
 	"\030TRUE"		/* TRUE_CTLS MSRs */
 	);
 	printf("\n        Pin-Based Controls=0x%b", pin,
 	"\020"
 	"\001ExtINT"		/* External-interrupt exiting */
 	"\004NMI"		/* NMI exiting */
 	"\006VNMI"		/* Virtual NMIs */
 	"\007PreTmr"		/* Activate VMX-preemption timer */
 	"\010PostIntr"		/* Process posted interrupts */
 	);
 	printf("\n        Primary Processor Controls=0x%b", proc,
 	"\020"
 	"\003INTWIN"		/* Interrupt-window exiting */
 	"\004TSCOff"		/* Use TSC offsetting */
 	"\010HLT"		/* HLT exiting */
 	"\012INVLPG"		/* INVLPG exiting */
 	"\013MWAIT"		/* MWAIT exiting */
 	"\014RDPMC"		/* RDPMC exiting */
 	"\015RDTSC"		/* RDTSC exiting */
 	"\020CR3-LD"		/* CR3-load exiting */
 	"\021CR3-ST"		/* CR3-store exiting */
 	"\024CR8-LD"		/* CR8-load exiting */
 	"\025CR8-ST"		/* CR8-store exiting */
 	"\026TPR"		/* Use TPR shadow */
 	"\027NMIWIN"		/* NMI-window exiting */
 	"\030MOV-DR"		/* MOV-DR exiting */
 	"\031IO"		/* Unconditional I/O exiting */
 	"\032IOmap"		/* Use I/O bitmaps */
 	"\034MTF"		/* Monitor trap flag */
 	"\035MSRmap"		/* Use MSR bitmaps */
 	"\036MONITOR"		/* MONITOR exiting */
 	"\037PAUSE"		/* PAUSE exiting */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		printf("\n        Secondary Processor Controls=0x%b", proc2,
 		"\020"
 		"\001APIC"		/* Virtualize APIC accesses */
 		"\002EPT"		/* Enable EPT */
 		"\003DT"		/* Descriptor-table exiting */
 		"\004RDTSCP"		/* Enable RDTSCP */
 		"\005x2APIC"		/* Virtualize x2APIC mode */
 		"\006VPID"		/* Enable VPID */
 		"\007WBINVD"		/* WBINVD exiting */
 		"\010UG"		/* Unrestricted guest */
 		"\011APIC-reg"		/* APIC-register virtualization */
 		"\012VID"		/* Virtual-interrupt delivery */
 		"\013PAUSE-loop"	/* PAUSE-loop exiting */
 		"\014RDRAND"		/* RDRAND exiting */
 		"\015INVPCID"		/* Enable INVPCID */
 		"\016VMFUNC"		/* Enable VM functions */
 		"\017VMCS"		/* VMCS shadowing */
 		"\020EPT#VE"		/* EPT-violation #VE */
 		"\021XSAVES"		/* Enable XSAVES/XRSTORS */
 		);
 	printf("\n        Exit Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore Host address-space size */
 	"\015PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\020AckInt"		/* Acknowledge interrupt on exit */
 	"\023PAT-SV"		/* Save MSR_PAT */
 	"\024PAT-LD"		/* Load MSR_PAT */
 	"\025EFER-SV"		/* Save MSR_EFER */
 	"\026EFER-LD"		/* Load MSR_EFER */
 	"\027PTMR-SV"		/* Save VMX-preemption timer value */
 	);
 	printf("\n        Entry Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore IA-32e mode guest */
 				/* Ignore Entry to SMM */
 				/* Ignore Deactivate dual-monitor treatment */
 	"\016PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\017PAT"		/* Load MSR_PAT */
 	"\020EFER"		/* Load MSR_EFER */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS &&
 	    (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) {
 		msr = rdmsr(MSR_VMX_EPT_VPID_CAP);
 		mask = msr;
 		printf("\n        EPT Features=0x%b", mask,
 		"\020"
 		"\001XO"		/* Execute-only translations */
 		"\007PW4"		/* Page-walk length of 4 */
 		"\011UC"		/* EPT paging-structure mem can be UC */
 		"\017WB"		/* EPT paging-structure mem can be WB */
 		"\0212M"		/* EPT PDE can map a 2-Mbyte page */
 		"\0221G"		/* EPT PDPTE can map a 1-Gbyte page */
 		"\025INVEPT"		/* INVEPT is supported */
 		"\026AD"		/* Accessed and dirty flags for EPT */
 		"\032single"		/* INVEPT single-context type */
 		"\033all"		/* INVEPT all-context type */
 		);
 		mask = msr >> 32;
 		printf("\n        VPID Features=0x%b", mask,
 		"\020"
 		"\001INVVPID"		/* INVVPID is supported */
 		"\011individual"	/* INVVPID individual-address type */
 		"\012single"		/* INVVPID single-context type */
 		"\013all"		/* INVVPID all-context type */
 		 /* INVVPID single-context-retaining-globals type */
 		"\014single-globals"
 		);
 	}
 }
 
 static void
 print_hypervisor_info(void)
 {
 
 	if (*hv_vendor != '\0')
 		printf("Hypervisor: Origin = \"%s\"\n", hv_vendor);
 }
 
 /*
  * Returns the maximum physical address that can be used with the
  * current system.
  */
 vm_paddr_t
 cpu_getmaxphyaddr(void)
 {
 
 #if defined(__i386__)
 	if (!pae_mode)
 		return (0xffffffff);
 #endif
 	return ((1ULL << cpu_maxphyaddr) - 1);
 }
Index: head/sys/x86/x86/local_apic.c
===================================================================
--- head/sys/x86/x86/local_apic.c	(revision 356939)
+++ head/sys/x86/x86/local_apic.c	(revision 356940)
@@ -1,2175 +1,2176 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Local APIC support on Pentium and later processors.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_hwpmc_hooks.h"
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <x86/init.h>
 
 #ifdef DDB
 #include <sys/interrupt.h>
 #include <ddb/ddb.h>
 #endif
 
 #ifdef __amd64__
 #define	SDT_APIC	SDT_SYSIGT
 #define	GSEL_APIC	0
 #else
 #define	SDT_APIC	SDT_SYS386IGT
 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
 
 /* Sanity checks on IDT vectors. */
 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
 CTASSERT(APIC_LOCAL_INTS == 240);
 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
 
 /*
  * I/O interrupts use non-negative IRQ values.  These values are used
  * to mark unused IDT entries or IDT entries reserved for a non-I/O
  * interrupt.
  */
 #define	IRQ_FREE	-1
 #define	IRQ_TIMER	-2
 #define	IRQ_SYSCALL	-3
 #define	IRQ_DTRACE_RET	-4
 #define	IRQ_EVTCHN	-5
 
 enum lat_timer_mode {
 	LAT_MODE_UNDEF =	0,
 	LAT_MODE_PERIODIC =	1,
 	LAT_MODE_ONESHOT =	2,
 	LAT_MODE_DEADLINE =	3,
 };
 
 /*
  * Support for local APICs.  Local APICs manage interrupts on each
  * individual processor as opposed to I/O APICs which receive interrupts
  * from I/O devices and then forward them on to the local APICs.
  *
  * Local APICs can also send interrupts to each other thus providing the
  * mechanism for IPIs.
  */
 
 struct lvt {
 	u_int lvt_edgetrigger:1;
 	u_int lvt_activehi:1;
 	u_int lvt_masked:1;
 	u_int lvt_active:1;
 	u_int lvt_mode:16;
 	u_int lvt_vector:8;
 };
 
 struct lapic {
 	struct lvt la_lvts[APIC_LVT_MAX + 1];
 	struct lvt la_elvts[APIC_ELVT_MAX + 1];
 	u_int la_id:8;
 	u_int la_cluster:4;
 	u_int la_cluster_id:2;
 	u_int la_present:1;
 	u_long *la_timer_count;
 	uint64_t la_timer_period;
 	enum lat_timer_mode la_timer_mode;
 	uint32_t lvt_timer_base;
 	uint32_t lvt_timer_last;
 	/* Include IDT_SYSCALL to make indexing easier. */
 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static *lapics;
 
 /* Global defaults for local APIC LVT entries. */
 static struct lvt lvts[APIC_LVT_MAX + 1] = {
 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
 };
 
 /* Global defaults for AMD local APIC ELVT entries. */
 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 };
 
 static inthand_t *ioint_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1),	/* 32 - 63 */
 	IDTVEC(apic_isr2),	/* 64 - 95 */
 	IDTVEC(apic_isr3),	/* 96 - 127 */
 	IDTVEC(apic_isr4),	/* 128 - 159 */
 	IDTVEC(apic_isr5),	/* 160 - 191 */
 	IDTVEC(apic_isr6),	/* 192 - 223 */
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
 static inthand_t *ioint_pti_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1_pti),	/* 32 - 63 */
 	IDTVEC(apic_isr2_pti),	/* 64 - 95 */
 	IDTVEC(apic_isr3_pti),	/* 96 - 127 */
 	IDTVEC(apic_isr4_pti),	/* 128 - 159 */
 	IDTVEC(apic_isr5_pti),	/* 160 - 191 */
 	IDTVEC(apic_isr6_pti),	/* 192 - 223 */
 	IDTVEC(apic_isr7_pti),	/* 224 - 255 */
 };
 
 static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
 
 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
 
 volatile char *lapic_map;
 vm_paddr_t lapic_paddr;
 int x2apic_mode;
 int lapic_eoi_suppression;
 static int lapic_timer_tsc_deadline;
 static u_long lapic_timer_divisor, count_freq;
 static struct eventtimer lapic_et;
 #ifdef SMP
 static uint64_t lapic_ipi_wait_mult;
 #endif
 unsigned int max_apic_id;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
     &lapic_eoi_suppression, 0, "");
 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
     &lapic_timer_tsc_deadline, 0, "");
 
 static void lapic_calibrate_initcount(struct lapic *la);
 static void lapic_calibrate_deadline(struct lapic *la);
 
 static uint32_t
 lapic_read32(enum LAPIC_REGISTERS reg)
 {
 	uint32_t res;
 
 	if (x2apic_mode) {
 		res = rdmsr32(MSR_APIC_000 + reg);
 	} else {
 		res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
 	}
 	return (res);
 }
 
 static void
 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		mfence();
 		lfence();
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 static void
 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 #ifdef SMP
 static uint64_t
 lapic_read_icr(void)
 {
 	uint64_t v;
 	uint32_t vhi, vlo;
 
 	if (x2apic_mode) {
 		v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO);
 	} else {
 		vhi = lapic_read32(LAPIC_ICR_HI);
 		vlo = lapic_read32(LAPIC_ICR_LO);
 		v = ((uint64_t)vhi << 32) | vlo;
 	}
 	return (v);
 }
 
 static uint64_t
 lapic_read_icr_lo(void)
 {
 
 	return (lapic_read32(LAPIC_ICR_LO));
 }
 
 static void
 lapic_write_icr(uint32_t vhi, uint32_t vlo)
 {
 	uint64_t v;
 
 	if (x2apic_mode) {
 		v = ((uint64_t)vhi << 32) | vlo;
 		mfence();
 		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
 	} else {
 		lapic_write32(LAPIC_ICR_HI, vhi);
 		lapic_write32(LAPIC_ICR_LO, vlo);
 	}
 }
 #endif /* SMP */
 
 static void
 native_lapic_enable_x2apic(void)
 {
 	uint64_t apic_base;
 
 	apic_base = rdmsr(MSR_APICBASE);
 	apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
 	wrmsr(MSR_APICBASE, apic_base);
 }
 
 static bool
 native_lapic_is_x2apic(void)
 {
 	uint64_t apic_base;
 
 	apic_base = rdmsr(MSR_APICBASE);
 	return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
 	    (APICBASE_X2APIC | APICBASE_ENABLED));
 }
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void	lapic_timer_oneshot(struct lapic *);
 static void	lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
 static void	lapic_timer_periodic(struct lapic *);
 static void	lapic_timer_deadline(struct lapic *);
 static void	lapic_timer_stop(struct lapic *);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
 static int	lapic_et_start(struct eventtimer *et,
 		    sbintime_t first, sbintime_t period);
 static int	lapic_et_stop(struct eventtimer *et);
 static u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 static void	lapic_set_tpr(u_int vector);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
 /* Forward declarations for apic_ops */
 static void	native_lapic_create(u_int apic_id, int boot_cpu);
 static void	native_lapic_init(vm_paddr_t addr);
 static void	native_lapic_xapic_mode(void);
 static void	native_lapic_setup(int boot);
 static void	native_lapic_dump(const char *str);
 static void	native_lapic_disable(void);
 static void	native_lapic_eoi(void);
 static int	native_lapic_id(void);
 static int	native_lapic_intr_pending(u_int vector);
 static u_int	native_apic_cpuid(u_int apic_id);
 static u_int	native_apic_alloc_vector(u_int apic_id, u_int irq);
 static u_int	native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
 		    u_int count, u_int align);
 static void 	native_apic_disable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_enable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 static void 	native_lapic_set_logical_id(u_int apic_id, u_int cluster,
 		    u_int cluster_id);
 static int 	native_lapic_enable_pmc(void);
 static void 	native_lapic_disable_pmc(void);
 static void 	native_lapic_reenable_pmc(void);
 static void 	native_lapic_enable_cmc(void);
 static int 	native_lapic_enable_mca_elvt(void);
 static int 	native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
 		    u_char masked);
 static int 	native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
 		    uint32_t mode);
 static int 	native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
 		    enum intr_polarity pol);
 static int 	native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
 		    enum intr_trigger trigger);
 #ifdef SMP
 static void 	native_lapic_ipi_raw(register_t icrlo, u_int dest);
 static void 	native_lapic_ipi_vectored(u_int vector, int dest);
 static int 	native_lapic_ipi_wait(int delay);
 #endif /* SMP */
 static int	native_lapic_ipi_alloc(inthand_t *ipifunc);
 static void	native_lapic_ipi_free(int vector);
 
 struct apic_ops apic_ops = {
 	.create			= native_lapic_create,
 	.init			= native_lapic_init,
 	.xapic_mode		= native_lapic_xapic_mode,
 	.is_x2apic		= native_lapic_is_x2apic,
 	.setup			= native_lapic_setup,
 	.dump			= native_lapic_dump,
 	.disable		= native_lapic_disable,
 	.eoi			= native_lapic_eoi,
 	.id			= native_lapic_id,
 	.intr_pending		= native_lapic_intr_pending,
 	.set_logical_id		= native_lapic_set_logical_id,
 	.cpuid			= native_apic_cpuid,
 	.alloc_vector		= native_apic_alloc_vector,
 	.alloc_vectors		= native_apic_alloc_vectors,
 	.enable_vector		= native_apic_enable_vector,
 	.disable_vector		= native_apic_disable_vector,
 	.free_vector		= native_apic_free_vector,
 	.enable_pmc		= native_lapic_enable_pmc,
 	.disable_pmc		= native_lapic_disable_pmc,
 	.reenable_pmc		= native_lapic_reenable_pmc,
 	.enable_cmc		= native_lapic_enable_cmc,
 	.enable_mca_elvt	= native_lapic_enable_mca_elvt,
 #ifdef SMP
 	.ipi_raw		= native_lapic_ipi_raw,
 	.ipi_vectored		= native_lapic_ipi_vectored,
 	.ipi_wait		= native_lapic_ipi_wait,
 #endif
 	.ipi_alloc		= native_lapic_ipi_alloc,
 	.ipi_free		= native_lapic_ipi_free,
 	.set_lvt_mask		= native_lapic_set_lvt_mask,
 	.set_lvt_mode		= native_lapic_set_lvt_mode,
 	.set_lvt_polarity	= native_lapic_set_lvt_polarity,
 	.set_lvt_triggermode	= native_lapic_set_lvt_triggermode,
 };
 
 static uint32_t
 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
 {
 
 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
 	    APIC_LVT_VECTOR);
 	if (lvt->lvt_edgetrigger == 0)
 		value |= APIC_LVT_TM;
 	if (lvt->lvt_activehi == 0)
 		value |= APIC_LVT_IIPP_INTALO;
 	if (lvt->lvt_masked)
 		value |= APIC_LVT_M;
 	value |= lvt->lvt_mode;
 	switch (lvt->lvt_mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		if (!lvt->lvt_edgetrigger && bootverbose) {
 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
 			    la->la_id, pin);
 			value &= ~APIC_LVT_TM;
 		}
 		/* Use a vector of 0. */
 		break;
 	case APIC_LVT_DM_FIXED:
 		value |= lvt->lvt_vector;
 		break;
 	default:
 		panic("bad APIC LVT delivery mode: %#x\n", value);
 	}
 	return (value);
 }
 
 static uint32_t
 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
 {
 	struct lvt *lvt;
 
 	KASSERT(pin <= APIC_LVT_MAX,
 	    ("%s: pin %u out of range", __func__, pin));
 	if (la->la_lvts[pin].lvt_active)
 		lvt = &la->la_lvts[pin];
 	else
 		lvt = &lvts[pin];
 
 	return (lvt_mode_impl(la, lvt, pin, value));
 }
 
 static uint32_t
 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
 {
 	struct lvt *elvt;
 
 	KASSERT(idx <= APIC_ELVT_MAX,
 	    ("%s: idx %u out of range", __func__, idx));
 
 	elvt = &la->la_elvts[idx];
 	KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
 	KASSERT(elvt->lvt_edgetrigger,
 	    ("%s: ELVT%u is not edge triggered", __func__, idx));
 	KASSERT(elvt->lvt_activehi,
 	    ("%s: ELVT%u is not active high", __func__, idx));
 	return (lvt_mode_impl(la, elvt, idx, value));
 }
 
 /*
  * Map the local APIC and setup necessary interrupt vectors.
  */
 static void
 native_lapic_init(vm_paddr_t addr)
 {
 #ifdef SMP
 	uint64_t r, r1, r2, rx;
 #endif
 	uint32_t ver;
 	int i;
 	bool arat;
 
 	/*
 	 * Enable x2APIC mode if possible. Map the local APIC
 	 * registers page.
 	 *
 	 * Keep the LAPIC registers page mapped uncached for x2APIC
 	 * mode too, to have direct map page attribute set to
 	 * uncached.  This is needed to work around CPU errata present
 	 * on all Intel processors.
 	 */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
 	lapic_paddr = addr;
 	lapic_map = pmap_mapdev(addr, PAGE_SIZE);
 	if (x2apic_mode) {
 		native_lapic_enable_x2apic();
 		lapic_map = NULL;
 	}
 
 	/* Setup the spurious interrupt handler. */
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* Local APIC error interrupt. */
 	setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* XXX: Thermal interrupt */
 
 	/* Local APIC CMCI. */
 	setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
 		/* Set if APIC timer runs in C3. */
 		arat = (cpu_power_eax & CPUTPM1_ARAT);
 
 		bzero(&lapic_et, sizeof(lapic_et));
 		lapic_et.et_name = "LAPIC";
 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 		    ET_FLAGS_PERCPU;
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
 			lapic_et.et_quality = 100;
 		}
 		if ((cpu_feature & CPUID_TSC) != 0 &&
 		    (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
 		    tsc_is_invariant && tsc_freq != 0) {
 			lapic_timer_tsc_deadline = 1;
 			TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
 			    &lapic_timer_tsc_deadline);
 		}
 
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */
 		lapic_et.et_min_period = 0x00001000LL;
 		lapic_et.et_max_period = SBT_1S;
 		lapic_et.et_start = lapic_et_start;
 		lapic_et.et_stop = lapic_et_stop;
 		lapic_et.et_priv = NULL;
 		et_register(&lapic_et);
 	}
 
 	/*
 	 * Set lapic_eoi_suppression after lapic_enable(), to not
 	 * enable suppression in the hardware prematurely.  Note that
 	 * we by default enable suppression even when system only has
 	 * one IO-APIC, since EOI is broadcasted to all APIC agents,
 	 * including CPUs, otherwise.
 	 *
 	 * It seems that at least some KVM versions report
 	 * EOI_SUPPRESSION bit, but auto-EOI does not work.
 	 */
 	ver = lapic_read32(LAPIC_VERSION);
 	if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
 		lapic_eoi_suppression = 1;
 		if (vm_guest == VM_GUEST_KVM) {
 			if (bootverbose)
 				printf(
 		       "KVM -- disabling lapic eoi suppression\n");
 			lapic_eoi_suppression = 0;
 		}
 		TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
 		    &lapic_eoi_suppression);
 	}
 
 #ifdef SMP
 #define	LOOPS	100000
 	/*
 	 * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
 	 * lapic_ipi_wait_mult contains the number of iterations which
 	 * approximately delay execution for 1 microsecond (the
 	 * argument to native_lapic_ipi_wait() is in microseconds).
 	 *
 	 * We assume that TSC is present and already measured.
 	 * Possible TSC frequency jumps are irrelevant to the
 	 * calibration loop below, the CPU clock management code is
 	 * not yet started, and we do not enter sleep states.
 	 */
 	KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
 	    ("TSC not initialized"));
 	if (!x2apic_mode) {
 		r = rdtsc();
 		for (rx = 0; rx < LOOPS; rx++) {
 			(void)lapic_read_icr_lo();
 			ia32_pause();
 		}
 		r = rdtsc() - r;
 		r1 = tsc_freq * LOOPS;
 		r2 = r * 1000000;
 		lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
 		if (bootverbose) {
 			printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
 			    "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
 			    (uintmax_t)r, (uintmax_t)tsc_freq);
 		}
 	}
 #undef LOOPS
 #endif /* SMP */
 }
 
 /*
  * Create a local APIC instance.
  */
 static void
 native_lapic_create(u_int apic_id, int boot_cpu)
 {
 	int i;
 
 	if (apic_id > max_apic_id) {
 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
 		if (boot_cpu)
 			panic("Can't ignore BSP");
 		return;
 	}
 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
 	    apic_id));
 
 	/*
 	 * Assume no local LVT overrides and a cluster of 0 and
 	 * intra-cluster ID of 0.
 	 */
 	lapics[apic_id].la_present = 1;
 	lapics[apic_id].la_id = apic_id;
 	for (i = 0; i <= APIC_LVT_MAX; i++) {
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_ELVT_MAX; i++) {
 		lapics[apic_id].la_elvts[i] = elvts[i];
 		lapics[apic_id].la_elvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
 	    lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
 #ifdef KDTRACE_HOOKS
 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
 	    IRQ_DTRACE_RET;
 #endif
 #ifdef XENHVM
 	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
 #endif
 
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
 #endif
 }
 
 static inline uint32_t
 amd_read_ext_features(void)
 {
 	uint32_t version;
 
-	if (cpu_vendor_id != CPU_VENDOR_AMD)
+	if (cpu_vendor_id != CPU_VENDOR_AMD &&
+	    cpu_vendor_id != CPU_VENDOR_HYGON)
 		return (0);
 	version = lapic_read32(LAPIC_VERSION);
 	if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
 		return (lapic_read32(LAPIC_EXT_FEATURES));
 	else
 		return (0);
 }
 
 static inline uint32_t
 amd_read_elvt_count(void)
 {
 	uint32_t extf;
 	uint32_t count;
 
 	extf = amd_read_ext_features();
 	count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
 	count = min(count, APIC_ELVT_MAX + 1);
 	return (count);
 }
 
 /*
  * Dump contents of local APIC registers
  */
 static void
 native_lapic_dump(const char* str)
 {
 	uint32_t version;
 	uint32_t maxlvt;
 	uint32_t extf;
 	int elvt_count;
 	int i;
 
 	version = lapic_read32(LAPIC_VERSION);
 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
 	    lapic_read32(LAPIC_ID), version,
 	    lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
 	if ((cpu_feature2 & CPUID2_X2APIC) != 0)
 		printf(" x2APIC: %d", x2apic_mode);
 	printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
 	    lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
 	    lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
 	    lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
 	    lapic_read32(LAPIC_LVT_ERROR));
 	if (maxlvt >= APIC_LVT_PMC)
 		printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
 	printf("\n");
 	if (maxlvt >= APIC_LVT_CMCI)
 		printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
 	extf = amd_read_ext_features();
 	if (extf != 0) {
 		printf("   AMD ext features: 0x%08x\n", extf);
 		elvt_count = amd_read_elvt_count();
 		for (i = 0; i < elvt_count; i++)
 			printf("   AMD elvt%d: 0x%08x\n", i,
 			    lapic_read32(LAPIC_EXT_LVT0 + i));
 	}
 }
 
 static void
 native_lapic_xapic_mode(void)
 {
 	register_t saveintr;
 
 	saveintr = intr_disable();
 	if (x2apic_mode)
 		native_lapic_enable_x2apic();
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_setup(int boot)
 {
 	struct lapic *la;
 	uint32_t version;
 	uint32_t maxlvt;
 	register_t saveintr;
 	int elvt_count;
 	int i;
 
 	saveintr = intr_disable();
 
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	version = lapic_read32(LAPIC_VERSION);
 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
 	lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
 	    lapic_read32(LAPIC_LVT_LINT0)));
 	lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
 	    lapic_read32(LAPIC_LVT_LINT1)));
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= APIC_LVT_PMC) {
 		lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 		    LAPIC_LVT_PCINT));
 	}
 
 	/* Program timer LVT. */
 	la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
 	    lapic_read32(LAPIC_LVT_TIMER));
 	la->lvt_timer_last = la->lvt_timer_base;
 	lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
 
 	/* Calibrate the timer parameters using BSP. */
 	if (boot && IS_BSP()) {
 		lapic_calibrate_initcount(la);
 		if (lapic_timer_tsc_deadline)
 			lapic_calibrate_deadline(la);
 	}
 
 	/* Setup the timer if configured. */
 	if (la->la_timer_mode != LAT_MODE_UNDEF) {
 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
 		    lapic_id()));
 		switch (la->la_timer_mode) {
 		case LAT_MODE_PERIODIC:
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_periodic(la);
 			break;
 		case LAT_MODE_ONESHOT:
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(la);
 			break;
 		case LAT_MODE_DEADLINE:
 			lapic_timer_deadline(la);
 			break;
 		default:
 			panic("corrupted la_timer_mode %p %d", la,
 			    la->la_timer_mode);
 		}
 	}
 
 	/* Program error LVT and clear any existing errors. */
 	lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
 	    lapic_read32(LAPIC_LVT_ERROR)));
 	lapic_write32(LAPIC_ESR, 0);
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
 	if (maxlvt >= APIC_LVT_CMCI) {
 		lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
 		    lapic_read32(LAPIC_LVT_CMCI)));
 	}
 
 	elvt_count = amd_read_elvt_count();
 	for (i = 0; i < elvt_count; i++) {
 		if (la->la_elvts[i].lvt_active)
 			lapic_write32(LAPIC_EXT_LVT0 + i,
 			    elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
 	}
 
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_intrcnt(void *dummy __unused)
 {
 	struct pcpu *pc;
 	struct lapic *la;
 	char buf[MAXCOMLEN + 1];
 
 	/* If there are no APICs, skip this function. */
 	if (lapics == NULL)
 		return;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		la = &lapics[pc->pc_apic_id];
 		if (!la->la_present)
 		    continue;
 
 		snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid);
 		intrcnt_add(buf, &la->la_timer_count);
 	}
 }
 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt,
     NULL);
 
 static void
 native_lapic_reenable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
 	value = lapic_read32(LAPIC_LVT_PCINT);
 	value &= ~APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_PCINT, value);
 #endif
 }
 
 #ifdef HWPMC_HOOKS
 static void
 lapic_update_pmc(void *dummy)
 {
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
 	lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 	    lapic_read32(LAPIC_LVT_PCINT)));
 }
 #endif
 
 static int
 native_lapic_enable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return (0);
 
 	lvts[APIC_LVT_PMC].lvt_masked = 0;
 
 #ifdef EARLY_AP_STARTUP
 	MPASS(mp_ncpus == 1 || smp_started);
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #else
 #ifdef SMP
 	/*
 	 * If hwpmc was loaded at boot time then the APs may not be
 	 * started yet.  In that case, don't forward the request to
 	 * them as they will program the lvt when they start.
 	 */
 	if (smp_started)
 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 	else
 #endif
 		lapic_update_pmc(NULL);
 #endif
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 static void
 native_lapic_disable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return;
 
 	lvts[APIC_LVT_PMC].lvt_masked = 1;
 
 #ifdef SMP
 	/* The APs should always be started when hwpmc is unloaded. */
 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
 #endif
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #endif
 }
 
 static void
 lapic_calibrate_initcount(struct lapic *la)
 {
 	u_long value;
 
 	/* Start off with a divisor of 2 (power on reset default). */
 	lapic_timer_divisor = 2;
 	/* Try to calibrate the local APIC timer. */
 	do {
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
 		DELAY(1000000);
 		value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER);
 		if (value != APIC_TIMER_MAX_COUNT)
 			break;
 		lapic_timer_divisor <<= 1;
 	} while (lapic_timer_divisor <= 128);
 	if (lapic_timer_divisor > 128)
 		panic("lapic: Divisor too big");
 	if (bootverbose) {
 		printf("lapic: Divisor %lu, Frequency %lu Hz\n",
 		    lapic_timer_divisor, value);
 	}
 	count_freq = value;
 }
 
 static void
 lapic_calibrate_deadline(struct lapic *la __unused)
 {
 
 	if (bootverbose) {
 		printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
 		    (uintmax_t)tsc_freq);
 	}
 }
 
 static void
 lapic_change_mode(struct eventtimer *et, struct lapic *la,
     enum lat_timer_mode newmode)
 {
 
 	if (la->la_timer_mode == newmode)
 		return;
 	switch (newmode) {
 	case LAT_MODE_PERIODIC:
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		et->et_frequency = count_freq;
 		break;
 	case LAT_MODE_DEADLINE:
 		et->et_frequency = tsc_freq;
 		break;
 	case LAT_MODE_ONESHOT:
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		et->et_frequency = count_freq;
 		break;
 	default:
 		panic("lapic_change_mode %d", newmode);
 	}
 	la->la_timer_mode = newmode;
 	et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
 	et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
 }
 
 static int
 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct lapic *la;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	if (period != 0) {
 		lapic_change_mode(et, la, LAT_MODE_PERIODIC);
 		la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
 		    32;
 		lapic_timer_periodic(la);
 	} else if (lapic_timer_tsc_deadline) {
 		lapic_change_mode(et, la, LAT_MODE_DEADLINE);
 		la->la_timer_period = (et->et_frequency * first) >> 32;
 		lapic_timer_deadline(la);
 	} else {
 		lapic_change_mode(et, la, LAT_MODE_ONESHOT);
 		la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
 		    32;
 		lapic_timer_oneshot(la);
 	}
 	return (0);
 }
 
 static int
 lapic_et_stop(struct eventtimer *et)
 {
 	struct lapic *la;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	lapic_timer_stop(la);
 	la->la_timer_mode = LAT_MODE_UNDEF;
 	return (0);
 }
 
 static void
 native_lapic_disable(void)
 {
 	uint32_t value;
 
 	/* Software disable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~APIC_SVR_SWEN;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 static void
 lapic_enable(void)
 {
 	uint32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
 	if (lapic_eoi_suppression)
 		value |= APIC_SVR_EOI_SUPPRESSION;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 /* Reset the local APIC on the BSP during resume. */
 static void
 lapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 
 	lapic_setup(0);
 }
 
 static int
 native_lapic_id(void)
 {
 	uint32_t v;
 
 	KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
 	v = lapic_read32(LAPIC_ID);
 	if (!x2apic_mode)
 		v >>= APIC_ID_SHIFT;
 	return (v);
 }
 
 static int
 native_lapic_intr_pending(u_int vector)
 {
 	uint32_t irr;
 
 	/*
 	 * The IRR registers are an array of registers each of which
 	 * only describes 32 interrupts in the low 32 bits.  Thus, we
 	 * divide the vector by 32 to get the register index.
 	 * Finally, we modulus the vector by 32 to determine the
 	 * individual bit to test.
 	 */
 	irr = lapic_read32(LAPIC_IRR0 + vector / 32);
 	return (irr & 1 << (vector % 32));
 }
 
 static void
 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 	struct lapic *la;
 
 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
 	    __func__, apic_id));
 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
 	    __func__, cluster));
 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
 	la = &lapics[apic_id];
 	la->la_cluster = cluster;
 	la->la_cluster_id = cluster_id;
 }
 
 static int
 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
 {
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_masked = masked;
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
 {
 	struct lvt *lvt;
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvt = &lvts[pin];
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lvt = &lapics[apic_id].la_lvts[pin];
 		lvt->lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	lvt->lvt_mode = mode;
 	switch (mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		lvt->lvt_edgetrigger = 1;
 		lvt->lvt_activehi = 1;
 		if (mode == APIC_LVT_DM_EXTINT)
 			lvt->lvt_masked = 1;
 		else
 			lvt->lvt_masked = 0;
 		break;
 	default:
 		panic("Unsupported delivery mode: 0x%x\n", mode);
 	}
 	if (bootverbose) {
 		printf(" Routing ");
 		switch (mode) {
 		case APIC_LVT_DM_NMI:
 			printf("NMI");
 			break;
 		case APIC_LVT_DM_SMI:
 			printf("SMI");
 			break;
 		case APIC_LVT_DM_INIT:
 			printf("INIT");
 			break;
 		case APIC_LVT_DM_EXTINT:
 			printf("ExtINT");
 			break;
 		}
 		printf(" -> LINT%u\n", pin);
 	}
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
 {
 
 	if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		lapics[apic_id].la_lvts[pin].lvt_activehi =
 		    (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u polarity: %s\n", pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
      enum intr_trigger trigger)
 {
 
 	if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
 		    (trigger == INTR_TRIGGER_EDGE);
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u trigger: %s\n", pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Adjust the TPR of the current CPU so that it blocks all interrupts below
  * the passed in vector.
  */
 static void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
 	lapic_write32(LAPIC_TPR, vector);
 #else
 	uint32_t tpr;
 
 	tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
 	tpr |= vector;
 	lapic_write32(LAPIC_TPR, tpr);
 #endif
 }
 
 static void
 native_lapic_eoi(void)
 {
 
 	lapic_write32_nofence(LAPIC_EOI, 0);
 }
 
 void
 lapic_handle_intr(int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
 	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
 void
 lapic_handle_timer(struct trapframe *frame)
 {
 	struct lapic *la;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	/* Send EOI first thing. */
 	lapic_eoi();
 
 #if defined(SMP) && !defined(SCHED_ULE)
 	/*
 	 * Don't do any accounting for the disabled HTT cores, since it
 	 * will provide misleading numbers for the userland.
 	 *
 	 * No locking is necessary here, since even if we lose the race
 	 * when hlt_cpus_mask changes it is not a big deal, really.
 	 *
 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
 	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
 	/* Look up our local APIC structure for the tick counters. */
 	la = &lapics[PCPU_GET(apic_id)];
 	(*la->la_timer_count)++;
 	critical_enter();
 	if (lapic_et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 	critical_exit();
 }
 
 static void
 lapic_timer_set_divisor(u_int divisor)
 {
 
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
 		("lapic: invalid divisor %u", divisor));
 	lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
 }
 
 static void
 lapic_timer_oneshot(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
 }
 
 static void
 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, count);
 }
 
 static void
 lapic_timer_periodic(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_PERIODIC;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
 }
 
 static void
 lapic_timer_deadline(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_TSCDLT;
 	if (value != la->lvt_timer_last) {
 		la->lvt_timer_last = value;
 		lapic_write32_nofence(LAPIC_LVT_TIMER, value);
 		if (!x2apic_mode)
 			mfence();
 	}
 	wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
 }
 
 static void
 lapic_timer_stop(struct lapic *la)
 {
 	uint32_t value;
 
 	if (la->la_timer_mode == LAT_MODE_DEADLINE) {
 		wrmsr(MSR_TSC_DEADLINE, 0);
 		mfence();
 	} else {
 		value = la->lvt_timer_base;
 		value &= ~APIC_LVTT_TM;
 		value |= APIC_LVT_M;
 		la->lvt_timer_last = value;
 		lapic_write32(LAPIC_LVT_TIMER, value);
 	}
 }
 
 void
 lapic_handle_cmc(void)
 {
 
 	lapic_eoi();
 	cmc_intr();
 }
 
 /*
  * Called from the mca_init() to activate the CMC interrupt if this CPU is
  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
  * is called prior to lapic_setup() during boot, this just needs to unmask
  * this CPU's LVT_CMCI entry.
  */
 static void
 native_lapic_enable_cmc(void)
 {
 	u_int apic_id;
 
 #ifdef DEV_ATPIC
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 #endif
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: CMCI unmasked\n", apic_id);
 }
 
 static int
 native_lapic_enable_mca_elvt(void)
 {
 	u_int apic_id;
 	uint32_t value;
 	int elvt_count;
 
 #ifdef DEV_ATPIC
 	if (lapic_map == NULL)
 		return (-1);
 #endif
 
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	elvt_count = amd_read_elvt_count();
 	if (elvt_count <= APIC_ELVT_MCA)
 		return (-1);
 
 	value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
 	if ((value & APIC_LVT_M) == 0) {
 		if (bootverbose)
 			printf("AMD MCE Thresholding Extended LVT is already active\n");
 		return (APIC_ELVT_MCA);
 	}
 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id);
 	return (APIC_ELVT_MCA);
 }
 
 void
 lapic_handle_error(void)
 {
 	uint32_t esr;
 
 	/*
 	 * Read the contents of the error status register.  Write to
 	 * the register first before reading from it to force the APIC
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
 	lapic_write32(LAPIC_ESR, 0);
 	esr = lapic_read32(LAPIC_ESR);
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
 }
 
 static u_int
 native_apic_cpuid(u_int apic_id)
 {
 #ifdef SMP
 	return apic_cpuids[apic_id];
 #else
 	return 0;
 #endif
 }
 
 /* Request a free IDT vector to be used by the specified IRQ. */
 static u_int
 native_apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
 
 	/*
 	 * Search for a free vector.  Currently we just use a very simple
 	 * algorithm to find the first free vector.
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 /*
  * Request 'count' free contiguous IDT vectors to be used by 'count'
  * IRQs.  'count' must be a power of two and the vectors will be
  * aligned on a boundary of 'align'.  If the request cannot be
  * satisfied, 0 is returned.
  */
 static u_int
 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
 	KASSERT(powerof2(count), ("bad count"));
 	KASSERT(powerof2(align), ("bad align"));
 	KASSERT(align >= count, ("align < count"));
 #ifdef INVARIANTS
 	for (run = 0; run < count; run++)
 		KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u",
 		    irqs[run], run));
 #endif
 
 	/*
 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
 	 * this just uses a simple first fit algorithm.
 	 */
 	run = 0;
 	first = 0;
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) {
 			run = 0;
 			first = 0;
 			continue;
 		}
 
 		/* Start a new run if run == 0 and vector is aligned. */
 		if (run == 0) {
 			if ((vector & (align - 1)) != 0)
 				continue;
 			first = vector;
 		}
 		run++;
 
 		/* Keep looping if the run isn't long enough yet. */
 		if (run < count)
 			continue;
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
 			lapics[apic_id].la_ioint_irqs[first + vector] =
 			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
 	return (0);
 }
 
 /*
  * Enable a vector for a particular apic_id.  Since all lapics share idt
  * entries and ioint_handlers this enables the vector on all lapics.  lapics
  * which do not have the vector configured would report spurious interrupts
  * should it fire.
  */
 static void
 native_apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 }
 
 static void
 native_apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef notyet
 	/*
 	 * We can not currently clear the idt entry because other cpus
 	 * may have a valid vector at this offset.
 	 */
 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
 	    SEL_KPL, GSEL_APIC);
 #endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
 static void
 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 	struct thread *td;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
 	    irq, ("IRQ mismatch"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 
 	/*
 	 * Bind us to the cpu that owned the vector before freeing it so
 	 * we don't lose an interrupt delivery race.
 	 */
 	td = curthread;
 	if (!rebooting) {
 		thread_lock(td);
 		if (sched_is_bound(td))
 			panic("apic_free_vector: Thread already bound.\n");
 		sched_bind(td, apic_cpuid(apic_id));
 		thread_unlock(td);
 	}
 	mtx_lock_spin(&icu_lock);
 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE;
 	mtx_unlock_spin(&icu_lock);
 	if (!rebooting) {
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 	}
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 static u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
 	if (irq < 0)
 		irq = 0;
 	return (irq);
 }
 
 #ifdef DDB
 /*
  * Dump data about APIC IDT vector mappings.
  */
 DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
 	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
 		verbose = 2;
 	else if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
 		if (lapics[apic_id].la_present == 0)
 			continue;
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
 			if (irq == IRQ_FREE || irq == IRQ_SYSCALL)
 				continue;
 #ifdef KDTRACE_HOOKS
 			if (irq == IRQ_DTRACE_RET)
 				continue;
 #endif
 #ifdef XENHVM
 			if (irq == IRQ_EVTCHN)
 				continue;
 #endif
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");
 			else if (irq < num_io_irqs) {
 				isrc = intr_lookup_source(irq);
 				if (isrc == NULL || verbose == 0)
 					db_printf("IRQ %u\n", irq);
 				else
 					db_dump_intr_event(isrc->is_event,
 					    verbose == 2);
 			} else
 				db_printf("IRQ %u ???\n", irq);
 		}
 	}
 }
 
 static void
 dump_mask(const char *prefix, uint32_t v, int base)
 {
 	int i, first;
 
 	first = 1;
 	for (i = 0; i < 32; i++)
 		if (v & (1 << i)) {
 			if (first) {
 				db_printf("%s:", prefix);
 				first = 0;
 			}
 			db_printf(" %02x", base + i);
 		}
 	if (!first)
 		db_printf("\n");
 }
 
 /* Show info from the lapic regs for this CPU. */
 DB_SHOW_COMMAND(lapic, db_show_lapic)
 {
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
 	v = lapic_read32(LAPIC_VERSION);
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
 	v = lapic_read32(LAPIC_SVR);
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
 	db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
 
 #define dump_field(prefix, regn, index)					\
 	dump_mask(__XSTRING(prefix ## index), 				\
 	    lapic_read32(LAPIC_ ## regn ## index),			\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
 	dump_field(isr, ISR, 0);
 	dump_field(isr, ISR, 1);
 	dump_field(isr, ISR, 2);
 	dump_field(isr, ISR, 3);
 	dump_field(isr, ISR, 4);
 	dump_field(isr, ISR, 5);
 	dump_field(isr, ISR, 6);
 	dump_field(isr, ISR, 7);
 
 	db_printf("TMR Interrupts:\n");
 	dump_field(tmr, TMR, 0);
 	dump_field(tmr, TMR, 1);
 	dump_field(tmr, TMR, 2);
 	dump_field(tmr, TMR, 3);
 	dump_field(tmr, TMR, 4);
 	dump_field(tmr, TMR, 5);
 	dump_field(tmr, TMR, 6);
 	dump_field(tmr, TMR, 7);
 
 	db_printf("IRR Interrupts:\n");
 	dump_field(irr, IRR, 0);
 	dump_field(irr, IRR, 1);
 	dump_field(irr, IRR, 2);
 	dump_field(irr, IRR, 3);
 	dump_field(irr, IRR, 4);
 	dump_field(irr, IRR, 5);
 	dump_field(irr, IRR, 6);
 	dump_field(irr, IRR, 7);
 
 #undef dump_field
 }
 #endif
 
 /*
  * APIC probing support code.  This includes code to manage enumerators.
  */
 
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
 
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {
 #ifdef INVARIANTS
 	struct apic_enumerator *apic_enum;
 
 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
 		if (apic_enum == enumerator)
 			panic("%s: Duplicate register of %s", __func__,
 			    enumerator->apic_name);
 	}
 #endif
 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
 }
 
 /*
  * We have to look for CPU's very, very early because certain subsystems
  * want to know how many CPU's we have extremely early on in the boot
  * process.
  */
 static void
 apic_init(void *dummy __unused)
 {
 	struct apic_enumerator *enumerator;
 	int retval, best;
 
 	/* We only support built in local APICs. */
 	if (!(cpu_feature & CPUID_APIC))
 		return;
 
 	/* Don't probe if APIC mode is disabled. */
 	if (resource_disabled("apic", 0))
 		return;
 
 	/* Probe all the enumerators to find the best match. */
 	best_enum = NULL;
 	best = 0;
 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
 		retval = enumerator->apic_probe();
 		if (retval > 0)
 			continue;
 		if (best_enum == NULL || best < retval) {
 			best_enum = enumerator;
 			best = retval;
 		}
 	}
 	if (best_enum == NULL) {
 		if (bootverbose)
 			printf("APIC: Could not find any APICs.\n");
 #ifndef DEV_ATPIC
 		panic("running without device atpic requires a local APIC");
 #endif
 		return;
 	}
 
 	if (bootverbose)
 		printf("APIC: Using the %s enumerator.\n",
 		    best_enum->apic_name);
 
 #ifdef I686_CPU
 	/*
 	 * To work around an errata, we disable the local APIC on some
 	 * CPUs during early startup.  We need to turn the local APIC back
 	 * on on such CPUs now.
 	 */
 	ppro_reenable_apic();
 #endif
 
 	/* Probe the CPU's in the system. */
 	retval = best_enum->apic_probe_cpus();
 	if (retval != 0)
 		printf("%s: Failed to probe CPUs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 }
 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
 
 /*
  * Setup the local APIC.  We have to do this prior to starting up the APs
  * in the SMP case.
  */
 static void
 apic_setup_local(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
 	    M_WAITOK | M_ZERO);
 
 	/* Initialize the local APIC. */
 	retval = best_enum->apic_setup_local();
 	if (retval != 0)
 		printf("%s: Failed to setup the local APIC: returned %d\n",
 		    best_enum->apic_name, retval);
 }
 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
 
 /*
  * Setup the I/O APICs.
  */
 static void
 apic_setup_io(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	/*
 	 * Local APIC must be registered before other PICs and pseudo PICs
 	 * for proper suspend/resume order.
 	 */
 	intr_register_pic(&lapic_pic);
 
 	retval = best_enum->apic_setup_io();
 	if (retval != 0)
 		printf("%s: Failed to setup I/O APICs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 	/*
 	 * Finish setting up the local APIC on the BSP once we know
 	 * how to properly program the LINT pins.  In particular, this
 	 * enables the EOI suppression mode, if LAPIC supports it and
 	 * user did not disable the mode.
 	 */
 	lapic_setup(1);
 	if (bootverbose)
 		lapic_dump("BSP");
 
 	/* Enable the MSI "pic". */
 	init_ops.msi_init();
 
 #ifdef XENHVM
 	xen_intr_alloc_irqs();
 #endif
 }
 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
 
 #ifdef SMP
 /*
  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
  * private to the MD code.  The public interface for the rest of the
  * kernel is defined in mp_machdep.c.
  */
 
 /*
  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
  * wait forever.
  */
 static int
 native_lapic_ipi_wait(int delay)
 {
 	uint64_t rx;
 
 	/* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
 	if (x2apic_mode)
 		return (1);
 
 	for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
 		if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
 		    APIC_DELSTAT_IDLE)
 			return (1);
 		ia32_pause();
 	}
 	return (0);
 }
 
 static void
 native_lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 	uint64_t icr;
 	uint32_t vhi, vlo;
 	register_t saveintr;
 
 	/* XXX: Need more sanity checking of icrlo? */
 	KASSERT(x2apic_mode || lapic_map != NULL,
 	    ("%s called too early", __func__));
 	KASSERT(x2apic_mode ||
 	    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
 	    ("%s: reserved bits set in ICR LO register", __func__));
 
 	/* Set destination in ICR HI register if it is being used. */
 	if (!x2apic_mode) {
 		saveintr = intr_disable();
 		icr = lapic_read_icr();
 	}
 
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
 		if (x2apic_mode) {
 			vhi = dest;
 		} else {
 			vhi = icr >> 32;
 			vhi &= ~APIC_ID_MASK;
 			vhi |= dest << APIC_ID_SHIFT;
 		}
 	} else {
 		vhi = 0;
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
 	if (x2apic_mode) {
 		vlo = icrlo;
 	} else {
 		vlo = icr;
 		vlo &= APIC_ICRLO_RESV_MASK;
 		vlo |= icrlo;
 	}
 	lapic_write_icr(vhi, vlo);
 	if (!x2apic_mode)
 		intr_restore(saveintr);
 }
 
 #define	BEFORE_SPIN	50000
 #ifdef DETECT_DEADLOCK
 #define	AFTER_SPIN	50
 #endif
 
 static void
 native_lapic_ipi_vectored(u_int vector, int dest)
 {
 	register_t icrlo, destfield;
 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
 
 	/*
 	 * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
 	 * regarding NMIs if passed, otherwise specify the vector.
 	 */
 	if (vector >= IPI_NMI_FIRST)
 		icrlo |= APIC_DELMODE_NMI;
 	else
 		icrlo |= vector | APIC_DELMODE_FIXED;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:
 		icrlo |= APIC_DEST_SELF;
 		break;
 	case APIC_IPI_DEST_ALL:
 		icrlo |= APIC_DEST_ALLISELF;
 		break;
 	case APIC_IPI_DEST_OTHERS:
 		icrlo |= APIC_DEST_ALLESELF;
 		break;
 	default:
 		KASSERT(x2apic_mode ||
 		    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 		    ("%s: invalid destination 0x%x", __func__, dest));
 		destfield = dest;
 	}
 
 	/* Wait for an earlier IPI to finish. */
 	if (!lapic_ipi_wait(BEFORE_SPIN)) {
 		if (KERNEL_PANICKED())
 			return;
 		else
 			panic("APIC: Previous IPI is stuck");
 	}
 
 	lapic_ipi_raw(icrlo, destfield);
 
 #ifdef DETECT_DEADLOCK
 	/* Wait for IPI to be delivered. */
 	if (!lapic_ipi_wait(AFTER_SPIN)) {
 #ifdef needsattention
 		/*
 		 * XXX FIXME:
 		 *
 		 * The above function waits for the message to actually be
 		 * delivered.  It breaks out after an arbitrary timeout
 		 * since the message should eventually be delivered (at
 		 * least in theory) and that if it wasn't we would catch
 		 * the failure with the check above when the next IPI is
 		 * sent.
 		 *
 		 * We could skip this wait entirely, EXCEPT it probably
 		 * protects us from other routines that assume that the
 		 * message was delivered and acted upon when this function
 		 * returns.
 		 */
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
 		while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}
 #endif /* DETECT_DEADLOCK */
 }
 
 #endif /* SMP */
 
 /*
  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
  * visible.
  *
  * Consider the case where an IPI is generated immediately after allocation:
  *     vector = lapic_ipi_alloc(ipifunc);
  *     ipi_selected(other_cpus, vector);
  *
  * In xAPIC mode a write to ICR_LO has serializing semantics because the
  * APIC page is mapped as an uncached region. In x2APIC mode there is an
  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
  * the IDT slot update is globally visible before the IPI is delivered.
  */
 static int
 native_lapic_ipi_alloc(inthand_t *ipifunc)
 {
 	struct gate_descriptor *ip;
 	long func;
 	int idx, vector;
 
 	KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
 	    ("invalid ipifunc %p", ipifunc));
 
 	vector = -1;
 	mtx_lock_spin(&icu_lock);
 	for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
 		ip = &idt[idx];
 		func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 		if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) ||
 		    (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) {
 			vector = idx;
 			setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
 			break;
 		}
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (vector);
 }
 
 static void
 native_lapic_ipi_free(int vector)
 {
 	struct gate_descriptor *ip;
 	long func;
 
 	KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	mtx_lock_spin(&icu_lock);
 	ip = &idt[vector];
 	func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 	KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
 	    func != (uintptr_t)&IDTVEC(rsvd_pti),
 	    ("invalid idtfunc %#lx", func));
 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
 	    SEL_KPL, GSEL_APIC);
 	mtx_unlock_spin(&icu_lock);
 }
Index: head/sys/x86/x86/mca.c
===================================================================
--- head/sys/x86/x86/mca.c	(revision 356939)
+++ head/sys/x86/x86/mca.c	(revision 356940)
@@ -1,1429 +1,1430 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009 Hudson River Trading LLC
  * Written by: John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Support for x86 machine check architecture.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifdef __amd64__
 #define	DEV_APIC
 #else
 #include "opt_apic.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
 /* Modes for mca_scan() */
 enum scan_mode {
 	POLLED,
 	MCE,
 	CMCI,
 };
 
 #ifdef DEV_APIC
 /*
  * State maintained for each monitored MCx bank to control the
  * corrected machine check interrupt threshold.
  */
 struct cmc_state {
 	int	max_threshold;
 	time_t	last_intr;
 };
 
 struct amd_et_state {
 	int	cur_threshold;
 	time_t	last_intr;
 };
 #endif
 
 struct mca_internal {
 	struct mca_record rec;
 	STAILQ_ENTRY(mca_internal) link;
 };
 
 struct mca_enumerator_ops {
         unsigned int (*ctl)(int);
         unsigned int (*status)(int);
         unsigned int (*addr)(int);
         unsigned int (*misc)(int);
 };
 
 static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
 
 static volatile int mca_count;	/* Number of records stored. */
 static int mca_banks;		/* Number of per-CPU register banks. */
 static int mca_maxcount = -1;	/* Limit on records stored. (-1 = unlimited) */
 
 static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL,
     "Machine Check Architecture");
 
 static int mca_enabled = 1;
 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
     "Administrative toggle for machine check support");
 
 static int amd10h_L1TP = 1;
 SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
     "Administrative toggle for logging of level one TLB parity (L1TP) errors");
 
 static int intel6h_HSD131;
 SYSCTL_INT(_hw_mca, OID_AUTO, intel6h_HSD131, CTLFLAG_RDTUN, &intel6h_HSD131, 0,
     "Administrative toggle for logging of spurious corrected errors");
 
 int workaround_erratum383;
 SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN,
     &workaround_erratum383, 0,
     "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
 
 static STAILQ_HEAD(, mca_internal) mca_freelist;
 static int mca_freecount;
 static STAILQ_HEAD(, mca_internal) mca_records;
 static STAILQ_HEAD(, mca_internal) mca_pending;
 static struct callout mca_timer;
 static int mca_ticks = 3600;	/* Check hourly by default. */
 static struct taskqueue *mca_tq;
 static struct task mca_resize_task, mca_scan_task;
 static struct mtx mca_lock;
 
 static unsigned int
 mca_ia32_ctl_reg(int bank)
 {
 	return (MSR_MC_CTL(bank));
 }
 
 static unsigned int
 mca_ia32_status_reg(int bank)
 {
 	return (MSR_MC_STATUS(bank));
 }
 
 static unsigned int
 mca_ia32_addr_reg(int bank)
 {
 	return (MSR_MC_ADDR(bank));
 }
 
 static unsigned int
 mca_ia32_misc_reg(int bank)
 {
 	return (MSR_MC_MISC(bank));
 }
 
 static unsigned int
 mca_smca_ctl_reg(int bank)
 {
         return (MSR_SMCA_MC_CTL(bank));
 }
 
 static unsigned int
 mca_smca_status_reg(int bank)
 {
         return (MSR_SMCA_MC_STATUS(bank));
 }
 
 static unsigned int
 mca_smca_addr_reg(int bank)
 {
         return (MSR_SMCA_MC_ADDR(bank));
 }
 
 static unsigned int
 mca_smca_misc_reg(int bank)
 {
         return (MSR_SMCA_MC_MISC(bank));
 }
 
 static struct mca_enumerator_ops mca_msr_ops = {
         .ctl    = mca_ia32_ctl_reg,
         .status = mca_ia32_status_reg,
         .addr   = mca_ia32_addr_reg,
         .misc   = mca_ia32_misc_reg
 };
 
 #ifdef DEV_APIC
 static struct cmc_state **cmc_state;		/* Indexed by cpuid, bank. */
 static struct amd_et_state **amd_et_state;	/* Indexed by cpuid, bank. */
 static int cmc_throttle = 60;	/* Time in seconds to throttle CMCI. */
 
 static int amd_elvt = -1;
 
 static inline bool
 amd_thresholding_supported(void)
 {
-	if (cpu_vendor_id != CPU_VENDOR_AMD)
+	if (cpu_vendor_id != CPU_VENDOR_AMD &&
+	    cpu_vendor_id != CPU_VENDOR_HYGON)
 		return (false);
 	/*
 	 * The RASCap register is wholly reserved in families 0x10-0x15 (through model 1F).
 	 *
 	 * It begins to be documented in family 0x15 model 30 and family 0x16,
 	 * but neither of these families documents the ScalableMca bit, which
 	 * supposedly defines the presence of this feature on family 0x17.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) >= 0x10 && CPUID_TO_FAMILY(cpu_id) <= 0x16)
 		return (true);
 	if (CPUID_TO_FAMILY(cpu_id) >= 0x17)
 		return ((amd_rascap & AMDRAS_SCALABLE_MCA) != 0);
 	return (false);
 }
 #endif
 
 static inline bool
 cmci_supported(uint64_t mcg_cap)
 {
 	/*
 	 * MCG_CAP_CMCI_P bit is reserved in AMD documentation.  Until
 	 * it is defined, do not use it to check for CMCI support.
 	 */
 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
 		return (false);
 	return ((mcg_cap & MCG_CAP_CMCI_P) != 0);
 }
 
 static int
 sysctl_positive_int(SYSCTL_HANDLER_ARGS)
 {
 	int error, value;
 
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (value <= 0)
 		return (EINVAL);
 	*(int *)arg1 = value;
 	return (0);
 }
 
 static int
 sysctl_mca_records(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct mca_record record;
 	struct mca_internal *rec;
 	int i;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	if (name[0] < 0 || name[0] >= mca_count)
 		return (EINVAL);
 
 	mtx_lock_spin(&mca_lock);
 	if (name[0] >= mca_count) {
 		mtx_unlock_spin(&mca_lock);
 		return (EINVAL);
 	}
 	i = 0;
 	STAILQ_FOREACH(rec, &mca_records, link) {
 		if (i == name[0]) {
 			record = rec->rec;
 			break;
 		}
 		i++;
 	}
 	mtx_unlock_spin(&mca_lock);
 	return (SYSCTL_OUT(req, &record, sizeof(record)));
 }
 
 static const char *
 mca_error_ttype(uint16_t mca_error)
 {
 
 	switch ((mca_error & 0x000c) >> 2) {
 	case 0:
 		return ("I");
 	case 1:
 		return ("D");
 	case 2:
 		return ("G");
 	}
 	return ("?");
 }
 
 static const char *
 mca_error_level(uint16_t mca_error)
 {
 
 	switch (mca_error & 0x0003) {
 	case 0:
 		return ("L0");
 	case 1:
 		return ("L1");
 	case 2:
 		return ("L2");
 	case 3:
 		return ("LG");
 	}
 	return ("L?");
 }
 
 static const char *
 mca_error_request(uint16_t mca_error)
 {
 
 	switch ((mca_error & 0x00f0) >> 4) {
 	case 0x0:
 		return ("ERR");
 	case 0x1:
 		return ("RD");
 	case 0x2:
 		return ("WR");
 	case 0x3:
 		return ("DRD");
 	case 0x4:
 		return ("DWR");
 	case 0x5:
 		return ("IRD");
 	case 0x6:
 		return ("PREFETCH");
 	case 0x7:
 		return ("EVICT");
 	case 0x8:
 		return ("SNOOP");
 	}
 	return ("???");
 }
 
 static const char *
 mca_error_mmtype(uint16_t mca_error)
 {
 
 	switch ((mca_error & 0x70) >> 4) {
 	case 0x0:
 		return ("GEN");
 	case 0x1:
 		return ("RD");
 	case 0x2:
 		return ("WR");
 	case 0x3:
 		return ("AC");
 	case 0x4:
 		return ("MS");
 	}
 	return ("???");
 }
 
 static int
 mca_mute(const struct mca_record *rec)
 {
 
 	/*
 	 * Skip spurious corrected parity errors generated by Intel Haswell-
 	 * and Broadwell-based CPUs (see HSD131, HSM142, HSW131 and BDM48
 	 * erratum respectively), unless reporting is enabled.
 	 * Note that these errors also have been observed with the D0-stepping
 	 * of Haswell, while at least initially the CPU specification updates
 	 * suggested only the C0-stepping to be affected.  Similarly, Celeron
 	 * 2955U with a CPU ID of 0x45 apparently are also concerned with the
 	 * same problem, with HSM142 only referring to 0x3c and 0x46.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    (CPUID_TO_MODEL(cpu_id) == 0x3c ||	/* HSD131, HSM142, HSW131 */
 	    CPUID_TO_MODEL(cpu_id) == 0x3d ||	/* BDM48 */
 	    CPUID_TO_MODEL(cpu_id) == 0x45 ||
 	    CPUID_TO_MODEL(cpu_id) == 0x46) &&	/* HSM142 */
 	    rec->mr_bank == 0 &&
 	    (rec->mr_status & 0xa0000000ffffffff) == 0x80000000000f0005 &&
 	    !intel6h_HSD131)
 	    	return (1);
 
 	return (0);
 }
 
 /* Dump details about a single machine check. */
 static void
 mca_log(const struct mca_record *rec)
 {
 	uint16_t mca_error;
 
 	if (mca_mute(rec))
 	    	return;
 
 	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
 	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
 	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
 	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
 	    rec->mr_cpu_id, rec->mr_apic_id);
 	printf("MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
 		printf("UNCOR ");
 	else {
 		printf("COR ");
 		if (cmci_supported(rec->mr_mcg_cap))
 			printf("(%lld) ", ((long long)rec->mr_status &
 			    MC_STATUS_COR_COUNT) >> 38);
 	}
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
 	if (rec->mr_status & MC_STATUS_OVER)
 		printf("OVER ");
 	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
 	switch (mca_error) {
 		/* Simple error codes. */
 	case 0x0000:
 		printf("no error");
 		break;
 	case 0x0001:
 		printf("unclassified error");
 		break;
 	case 0x0002:
 		printf("ucode ROM parity error");
 		break;
 	case 0x0003:
 		printf("external error");
 		break;
 	case 0x0004:
 		printf("FRC error");
 		break;
 	case 0x0005:
 		printf("internal parity error");
 		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
 	default:
 		if ((mca_error & 0xfc00) == 0x0400) {
 			printf("internal error %x", mca_error & 0x03ff);
 			break;
 		}
 
 		/* Compound error codes. */
 
 		/* Memory hierarchy error. */
 		if ((mca_error & 0xeffc) == 0x000c) {
 			printf("%s memory error", mca_error_level(mca_error));
 			break;
 		}
 
 		/* TLB error. */
 		if ((mca_error & 0xeff0) == 0x0010) {
 			printf("%sTLB %s error", mca_error_ttype(mca_error),
 			    mca_error_level(mca_error));
 			break;
 		}
 
 		/* Memory controller error. */
 		if ((mca_error & 0xef80) == 0x0080) {
 			printf("%s channel ", mca_error_mmtype(mca_error));
 			if ((mca_error & 0x000f) != 0x000f)
 				printf("%d", mca_error & 0x000f);
 			else
 				printf("??");
 			printf(" memory error");
 			break;
 		}
 		
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
 			    mca_error_ttype(mca_error),
 			    mca_error_level(mca_error),
 			    mca_error_request(mca_error));
 			break;
 		}
 
 		/* Bus and/or Interconnect error. */
 		if ((mca_error & 0xe800) == 0x0800) {			
 			printf("BUS%s ", mca_error_level(mca_error));
 			switch ((mca_error & 0x0600) >> 9) {
 			case 0:
 				printf("Source");
 				break;
 			case 1:
 				printf("Responder");
 				break;
 			case 2:
 				printf("Observer");
 				break;
 			default:
 				printf("???");
 				break;
 			}
 			printf(" %s ", mca_error_request(mca_error));
 			switch ((mca_error & 0x000c) >> 2) {
 			case 0:
 				printf("Memory");
 				break;
 			case 2:
 				printf("I/O");
 				break;
 			case 3:
 				printf("Other");
 				break;
 			default:
 				printf("???");
 				break;
 			}
 			if (mca_error & 0x0100)
 				printf(" timed out");
 			break;
 		}
 
 		printf("unknown error %x", mca_error);
 		break;
 	}
 	printf("\n");
 	if (rec->mr_status & MC_STATUS_ADDRV)
 		printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
 	if (rec->mr_status & MC_STATUS_MISCV)
 		printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
 }
 
 static int
 mca_check_status(int bank, struct mca_record *rec)
 {
 	uint64_t status;
 	u_int p[4];
 
 	status = rdmsr(mca_msr_ops.status(bank));
 	if (!(status & MC_STATUS_VAL))
 		return (0);
 
 	/* Save exception information. */
 	rec->mr_status = status;
 	rec->mr_bank = bank;
 	rec->mr_addr = 0;
 	if (status & MC_STATUS_ADDRV)
 		rec->mr_addr = rdmsr(mca_msr_ops.addr(bank));
 	rec->mr_misc = 0;
 	if (status & MC_STATUS_MISCV)
 		rec->mr_misc = rdmsr(mca_msr_ops.misc(bank));
 	rec->mr_tsc = rdtsc();
 	rec->mr_apic_id = PCPU_GET(apic_id);
 	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
 	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
 	rec->mr_cpu_id = cpu_id;
 	rec->mr_cpu_vendor_id = cpu_vendor_id;
 	rec->mr_cpu = PCPU_GET(cpuid);
 
 	/*
 	 * Clear machine check.  Don't do this for uncorrectable
 	 * errors so that the BIOS can see them.
 	 */
 	if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
 		wrmsr(mca_msr_ops.status(bank), 0);
 		do_cpuid(0, p);
 	}
 	return (1);
 }
 
 static void
 mca_resize_freelist(void)
 {
 	struct mca_internal *next, *rec;
 	STAILQ_HEAD(, mca_internal) tmplist;
 	int count, i, desired_max, desired_min;
 
 	/*
 	 * Ensure we have at least one record for each bank and one
 	 * record per CPU, but no more than twice that amount.
 	 */
 	desired_min = imax(mp_ncpus, mca_banks);
 	desired_max = imax(mp_ncpus, mca_banks) * 2;
 	STAILQ_INIT(&tmplist);
 	mtx_lock_spin(&mca_lock);
 	while (mca_freecount > desired_max) {
 		rec = STAILQ_FIRST(&mca_freelist);
 		KASSERT(rec != NULL, ("mca_freecount is %d, but list is empty",
 		    mca_freecount));
 		STAILQ_REMOVE_HEAD(&mca_freelist, link);
 		mca_freecount--;
 		STAILQ_INSERT_TAIL(&tmplist, rec, link);
 	}
 	while (mca_freecount < desired_min) {
 		count = desired_min - mca_freecount;
 		mtx_unlock_spin(&mca_lock);
 		for (i = 0; i < count; i++) {
 			rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
 			STAILQ_INSERT_TAIL(&tmplist, rec, link);
 		}
 		mtx_lock_spin(&mca_lock);
 		STAILQ_CONCAT(&mca_freelist, &tmplist);
 		mca_freecount += count;
 	}
 	mtx_unlock_spin(&mca_lock);
 	STAILQ_FOREACH_SAFE(rec, &tmplist, link, next)
 		free(rec, M_MCA);
 }
 
 static void
 mca_resize(void *context, int pending)
 {
 
 	mca_resize_freelist();
 }
 
 static void
 mca_record_entry(enum scan_mode mode, const struct mca_record *record)
 {
 	struct mca_internal *rec;
 
 	if (mode == POLLED) {
 		rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
 		mtx_lock_spin(&mca_lock);
 	} else {
 		mtx_lock_spin(&mca_lock);
 		rec = STAILQ_FIRST(&mca_freelist);
 		if (rec == NULL) {
 			printf("MCA: Unable to allocate space for an event.\n");
 			mca_log(record);
 			mtx_unlock_spin(&mca_lock);
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&mca_freelist, link);
 		mca_freecount--;
 	}
 
 	rec->rec = *record;
 	STAILQ_INSERT_TAIL(&mca_pending, rec, link);
 	mtx_unlock_spin(&mca_lock);
 }
 
 #ifdef DEV_APIC
 /*
  * Update the interrupt threshold for a CMCI.  The strategy is to use
  * a low trigger that interrupts as soon as the first event occurs.
  * However, if a steady stream of events arrive, the threshold is
  * increased until the interrupts are throttled to once every
  * cmc_throttle seconds or the periodic scan.  If a periodic scan
  * finds that the threshold is too high, it is lowered.
  */
 static int
 update_threshold(enum scan_mode mode, int valid, int last_intr, int count,
     int cur_threshold, int max_threshold)
 {
 	u_int delta;
 	int limit;
 
 	delta = (u_int)(time_uptime - last_intr);
 	limit = cur_threshold;
 
 	/*
 	 * If an interrupt was received less than cmc_throttle seconds
 	 * since the previous interrupt and the count from the current
 	 * event is greater than or equal to the current threshold,
 	 * double the threshold up to the max.
 	 */
 	if (mode == CMCI && valid) {
 		if (delta < cmc_throttle && count >= limit &&
 		    limit < max_threshold) {
 			limit = min(limit << 1, max_threshold);
 		}
 		return (limit);
 	}
 
 	/*
 	 * When the banks are polled, check to see if the threshold
 	 * should be lowered.
 	 */
 	if (mode != POLLED)
 		return (limit);
 
 	/* If a CMCI occured recently, do nothing for now. */
 	if (delta < cmc_throttle)
 		return (limit);
 
 	/*
 	 * Compute a new limit based on the average rate of events per
 	 * cmc_throttle seconds since the last interrupt.
 	 */
 	if (valid) {
 		limit = count * cmc_throttle / delta;
 		if (limit <= 0)
 			limit = 1;
 		else if (limit > max_threshold)
 			limit = max_threshold;
 	} else {
 		limit = 1;
 	}
 	return (limit);
 }
 
 static void
 cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
 {
 	struct cmc_state *cc;
 	uint64_t ctl;
 	int cur_threshold, new_threshold;
 	int count;
 
 	/* Fetch the current limit for this bank. */
 	cc = &cmc_state[PCPU_GET(cpuid)][bank];
 	ctl = rdmsr(MSR_MC_CTL2(bank));
 	count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
 	cur_threshold = ctl & MC_CTL2_THRESHOLD;
 
 	new_threshold = update_threshold(mode, valid, cc->last_intr, count,
 	    cur_threshold, cc->max_threshold);
 
 	if (mode == CMCI && valid)
 		cc->last_intr = time_uptime;
 	if (new_threshold != cur_threshold) {
 		ctl &= ~MC_CTL2_THRESHOLD;
 		ctl |= new_threshold;
 		wrmsr(MSR_MC_CTL2(bank), ctl);
 	}
 }
 
 static void
 amd_thresholding_update(enum scan_mode mode, int bank, int valid)
 {
 	struct amd_et_state *cc;
 	uint64_t misc;
 	int new_threshold;
 	int count;
 
 	cc = &amd_et_state[PCPU_GET(cpuid)][bank];
 	misc = rdmsr(mca_msr_ops.misc(bank));
 	count = (misc & MC_MISC_AMD_CNT_MASK) >> MC_MISC_AMD_CNT_SHIFT;
 	count = count - (MC_MISC_AMD_CNT_MAX - cc->cur_threshold);
 
 	new_threshold = update_threshold(mode, valid, cc->last_intr, count,
 	    cc->cur_threshold, MC_MISC_AMD_CNT_MAX);
 
 	cc->cur_threshold = new_threshold;
 	misc &= ~MC_MISC_AMD_CNT_MASK;
 	misc |= (uint64_t)(MC_MISC_AMD_CNT_MAX - cc->cur_threshold)
 	    << MC_MISC_AMD_CNT_SHIFT;
 	misc &= ~MC_MISC_AMD_OVERFLOW;
 	wrmsr(mca_msr_ops.misc(bank), misc);
 	if (mode == CMCI && valid)
 		cc->last_intr = time_uptime;
 }
 #endif
 
 /*
  * This scans all the machine check banks of the current CPU to see if
  * there are any machine checks.  Any non-recoverable errors are
  * reported immediately via mca_log().  The current thread must be
  * pinned when this is called.  The 'mode' parameter indicates if we
  * are being called from the MC exception handler, the CMCI handler,
  * or the periodic poller.  In the MC exception case this function
  * returns true if the system is restartable.  Otherwise, it returns a
  * count of the number of valid MC records found.
  */
 static int
 mca_scan(enum scan_mode mode, int *recoverablep)
 {
 	struct mca_record rec;
 	uint64_t mcg_cap, ucmask;
 	int count, i, recoverable, valid;
 
 	count = 0;
 	recoverable = 1;
 	ucmask = MC_STATUS_UC | MC_STATUS_PCC;
 
 	/* When handling a MCE#, treat the OVER flag as non-restartable. */
 	if (mode == MCE)
 		ucmask |= MC_STATUS_OVER;
 	mcg_cap = rdmsr(MSR_MCG_CAP);
 	for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
 #ifdef DEV_APIC
 		/*
 		 * For a CMCI, only check banks this CPU is
 		 * responsible for.
 		 */
 		if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
 			continue;
 #endif
 
 		valid = mca_check_status(i, &rec);
 		if (valid) {
 			count++;
 			if (rec.mr_status & ucmask) {
 				recoverable = 0;
 				mtx_lock_spin(&mca_lock);
 				mca_log(&rec);
 				mtx_unlock_spin(&mca_lock);
 			}
 			mca_record_entry(mode, &rec);
 		}
 	
 #ifdef DEV_APIC
 		/*
 		 * If this is a bank this CPU monitors via CMCI,
 		 * update the threshold.
 		 */
 		if (PCPU_GET(cmci_mask) & 1 << i) {
 			if (cmc_state != NULL)
 				cmci_update(mode, i, valid, &rec);
 			else
 				amd_thresholding_update(mode, i, valid);
 		}
 #endif
 	}
 	if (recoverablep != NULL)
 		*recoverablep = recoverable;
 	return (count);
 }
 
 /*
  * Store a new record on the mca_records list while enforcing
  * mca_maxcount.
  */
 static void
 mca_store_record(struct mca_internal *mca)
 {
 
 	/*
 	 * If we are storing no records (mca_maxcount == 0),
 	 * we just free this record.
 	 *
 	 * If we are storing records (mca_maxcount != 0) and
 	 * we have free space on the list, store the record
 	 * and increment mca_count.
 	 *
 	 * If we are storing records and we do not have free
 	 * space on the list, store the new record at the
 	 * tail and free the oldest one from the head.
 	 */
 	if (mca_maxcount != 0)
 		STAILQ_INSERT_TAIL(&mca_records, mca, link);
 	if (mca_maxcount < 0 || mca_count < mca_maxcount)
 		mca_count++;
 	else {
 		if (mca_maxcount != 0) {
 			mca = STAILQ_FIRST(&mca_records);
 			STAILQ_REMOVE_HEAD(&mca_records, link);
 		}
 		STAILQ_INSERT_TAIL(&mca_freelist, mca, link);
 		mca_freecount++;
 	}
 }
 
 /*
  * Do the work to process machine check records which have just been
  * gathered. Print any pending logs to the console. Queue them for storage.
  * Trigger a resizing of the free list.
  */
 static void
 mca_process_records(enum scan_mode mode)
 {
 	struct mca_internal *mca;
 
 	mtx_lock_spin(&mca_lock);
 	while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) {
 		STAILQ_REMOVE_HEAD(&mca_pending, link);
 		mca_log(&mca->rec);
 		mca_store_record(mca);
 	}
 	mtx_unlock_spin(&mca_lock);
 	if (mode == POLLED)
 		mca_resize_freelist();
 	else if (!cold)
 		taskqueue_enqueue(mca_tq, &mca_resize_task);
 }
 
 /*
  * Scan the machine check banks on all CPUs by binding to each CPU in
  * turn.  If any of the CPUs contained new machine check records, log
  * them to the console.
  */
 static void
 mca_scan_cpus(void *context, int pending)
 {
 	struct thread *td;
 	int count, cpu;
 
 	mca_resize_freelist();
 	td = curthread;
 	count = 0;
 	thread_lock(td);
 	CPU_FOREACH(cpu) {
 		sched_bind(td, cpu);
 		thread_unlock(td);
 		count += mca_scan(POLLED, NULL);
 		thread_lock(td);
 		sched_unbind(td);
 	}
 	thread_unlock(td);
 	if (count != 0)
 		mca_process_records(POLLED);
 }
 
 static void
 mca_periodic_scan(void *arg)
 {
 
 	taskqueue_enqueue(mca_tq, &mca_scan_task);
 	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
 }
 
 static int
 sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = 0;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error)
 		return (error);
 	if (i)
 		taskqueue_enqueue(mca_tq, &mca_scan_task);
 	return (0);
 }
 
 static int
 sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS)
 {
 	struct mca_internal *mca;
 	int error, i;
 	bool doresize;
 
 	i = mca_maxcount;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	mtx_lock_spin(&mca_lock);
 	mca_maxcount = i;
 	doresize = false;
 	if (mca_maxcount >= 0)
 		while (mca_count > mca_maxcount) {
 			mca = STAILQ_FIRST(&mca_records);
 			STAILQ_REMOVE_HEAD(&mca_records, link);
 			mca_count--;
 			STAILQ_INSERT_TAIL(&mca_freelist, mca, link);
 			mca_freecount++;
 			doresize = true;
 		}
 	mtx_unlock_spin(&mca_lock);
 	if (doresize && !cold)
 		taskqueue_enqueue(mca_tq, &mca_resize_task);
 	return (error);
 }
 
 static void
 mca_createtq(void *dummy)
 {
 	if (mca_banks <= 0)
 		return;
 
 	mca_tq = taskqueue_create_fast("mca", M_WAITOK,
 	    taskqueue_thread_enqueue, &mca_tq);
 	taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq");
 
 	/* CMCIs during boot may have claimed items from the freelist. */
 	mca_resize_freelist();
 }
 SYSINIT(mca_createtq, SI_SUB_CONFIGURE, SI_ORDER_ANY, mca_createtq, NULL);
 
 static void
 mca_startup(void *dummy)
 {
 
 	if (mca_banks <= 0)
 		return;
 
 	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
 }
 #ifdef EARLY_AP_STARTUP
 SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL);
 #else
 SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
 #endif
 
 #ifdef DEV_APIC
 static void
 cmci_setup(void)
 {
 	int i;
 
 	cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state *), M_MCA,
 	    M_WAITOK);
 	for (i = 0; i <= mp_maxid; i++)
 		cmc_state[i] = malloc(sizeof(struct cmc_state) * mca_banks,
 		    M_MCA, M_WAITOK | M_ZERO);
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 	    &cmc_throttle, 0, sysctl_positive_int, "I",
 	    "Interval in seconds to throttle corrected MC interrupts");
 }
 
 static void
 amd_thresholding_setup(void)
 {
 	u_int i;
 
 	amd_et_state = malloc((mp_maxid + 1) * sizeof(struct amd_et_state *),
 	    M_MCA, M_WAITOK);
 	for (i = 0; i <= mp_maxid; i++)
 		amd_et_state[i] = malloc(sizeof(struct amd_et_state) *
 		    mca_banks, M_MCA, M_WAITOK | M_ZERO);
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 	    &cmc_throttle, 0, sysctl_positive_int, "I",
 	    "Interval in seconds to throttle corrected MC interrupts");
 }
 #endif
 
 static void
 mca_setup(uint64_t mcg_cap)
 {
 
 	/*
 	 * On AMD Family 10h processors, unless logging of level one TLB
 	 * parity (L1TP) errors is disabled, enable the recommended workaround
 	 * for Erratum 383.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
 		workaround_erratum383 = 1;
 
 	mca_banks = mcg_cap & MCG_CAP_COUNT;
 	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
 	STAILQ_INIT(&mca_records);
 	STAILQ_INIT(&mca_pending);
 	TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL);
 	callout_init(&mca_timer, 1);
 	STAILQ_INIT(&mca_freelist);
 	TASK_INIT(&mca_resize_task, 0, mca_resize, NULL);
 	mca_resize_freelist();
 	SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0,
 	    "Record count");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "maxcount", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
 	    &mca_maxcount, 0, sysctl_mca_maxcount, "I",
 	    "Maximum record count (-1 is unlimited)");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
 	    0, sysctl_positive_int, "I",
 	    "Periodic interval in seconds to scan for machine checks");
 	SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 	    sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
 #ifdef DEV_APIC
 	if (cmci_supported(mcg_cap))
 		cmci_setup();
 	else if (amd_thresholding_supported())
 		amd_thresholding_setup();
 #endif
 }
 
 #ifdef DEV_APIC
 /*
  * See if we should monitor CMCI for this bank.  If CMCI_EN is already
  * set in MC_CTL2, then another CPU is responsible for this bank, so
  * ignore it.  If CMCI_EN returns zero after being set, then this bank
  * does not support CMCI_EN.  If this CPU sets CMCI_EN, then it should
  * now monitor this bank.
  */
 static void
 cmci_monitor(int i)
 {
 	struct cmc_state *cc;
 	uint64_t ctl;
 
 	KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
 
 	ctl = rdmsr(MSR_MC_CTL2(i));
 	if (ctl & MC_CTL2_CMCI_EN)
 		/* Already monitored by another CPU. */
 		return;
 
 	/* Set the threshold to one event for now. */
 	ctl &= ~MC_CTL2_THRESHOLD;
 	ctl |= MC_CTL2_CMCI_EN | 1;
 	wrmsr(MSR_MC_CTL2(i), ctl);
 	ctl = rdmsr(MSR_MC_CTL2(i));
 	if (!(ctl & MC_CTL2_CMCI_EN))
 		/* This bank does not support CMCI. */
 		return;
 
 	cc = &cmc_state[PCPU_GET(cpuid)][i];
 
 	/* Determine maximum threshold. */
 	ctl &= ~MC_CTL2_THRESHOLD;
 	ctl |= 0x7fff;
 	wrmsr(MSR_MC_CTL2(i), ctl);
 	ctl = rdmsr(MSR_MC_CTL2(i));
 	cc->max_threshold = ctl & MC_CTL2_THRESHOLD;
 
 	/* Start off with a threshold of 1. */
 	ctl &= ~MC_CTL2_THRESHOLD;
 	ctl |= 1;
 	wrmsr(MSR_MC_CTL2(i), ctl);
 
 	/* Mark this bank as monitored. */
 	PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
 }
 
 /*
  * For resume, reset the threshold for any banks we monitor back to
  * one and throw away the timestamp of the last interrupt.
  */
 static void
 cmci_resume(int i)
 {
 	struct cmc_state *cc;
 	uint64_t ctl;
 
 	KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
 
 	/* Ignore banks not monitored by this CPU. */
 	if (!(PCPU_GET(cmci_mask) & 1 << i))
 		return;
 
 	cc = &cmc_state[PCPU_GET(cpuid)][i];
 	cc->last_intr = 0;
 	ctl = rdmsr(MSR_MC_CTL2(i));
 	ctl &= ~MC_CTL2_THRESHOLD;
 	ctl |= MC_CTL2_CMCI_EN | 1;
 	wrmsr(MSR_MC_CTL2(i), ctl);
 }
 
 /*
  * Apply an AMD ET configuration to the corresponding MSR.
  */
 static void
 amd_thresholding_start(struct amd_et_state *cc, int bank)
 {
 	uint64_t misc;
 
 	KASSERT(amd_elvt >= 0, ("ELVT offset is not set"));
 
 	misc = rdmsr(mca_msr_ops.misc(bank));
 
 	misc &= ~MC_MISC_AMD_INT_MASK;
 	misc |= MC_MISC_AMD_INT_LVT;
 
 	misc &= ~MC_MISC_AMD_LVT_MASK;
 	misc |= (uint64_t)amd_elvt << MC_MISC_AMD_LVT_SHIFT;
 
 	misc &= ~MC_MISC_AMD_CNT_MASK;
 	misc |= (uint64_t)(MC_MISC_AMD_CNT_MAX - cc->cur_threshold)
 	    << MC_MISC_AMD_CNT_SHIFT;
 
 	misc &= ~MC_MISC_AMD_OVERFLOW;
 	misc |= MC_MISC_AMD_CNTEN;
 
 	wrmsr(mca_msr_ops.misc(bank), misc);
 }
 
 static void
 amd_thresholding_monitor(int i)
 {
 	struct amd_et_state *cc;
 	uint64_t misc;
 
 	/*
 	 * Kludge: On 10h, banks after 4 are not thresholding but also may have
 	 * bogus Valid bits.  Skip them.  This is definitely fixed in 15h, but
 	 * I have not investigated whether it is fixed in earlier models.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) < 0x15 && i >= 5)
 		return;
 
 	/* The counter must be valid and present. */
 	misc = rdmsr(mca_msr_ops.misc(i));
 	if ((misc & (MC_MISC_AMD_VAL | MC_MISC_AMD_CNTP)) !=
 	    (MC_MISC_AMD_VAL | MC_MISC_AMD_CNTP))
 		return;
 
 	/* The register should not be locked. */
 	if ((misc & MC_MISC_AMD_LOCK) != 0) {
 		if (bootverbose)
 			printf("%s: 0x%jx: Bank %d: locked\n", __func__,
 			    (uintmax_t)misc, i);
 		return;
 	}
 
 	/*
 	 * If counter is enabled then either the firmware or another CPU
 	 * has already claimed it.
 	 */
 	if ((misc & MC_MISC_AMD_CNTEN) != 0) {
 		if (bootverbose)
 			printf("%s: 0x%jx: Bank %d: already enabled\n",
 			    __func__, (uintmax_t)misc, i);
 		return;
 	}
 
 	/*
 	 * Configure an Extended Interrupt LVT register for reporting
 	 * counter overflows if that feature is supported and the first
 	 * extended register is available.
 	 */
 	amd_elvt = lapic_enable_mca_elvt();
 	if (amd_elvt < 0) {
 		printf("%s: Bank %d: lapic enable mca elvt failed: %d\n",
 		    __func__, i, amd_elvt);
 		return;
 	}
 
 	/* Re-use Intel CMC support infrastructure. */
 	if (bootverbose)
 		printf("%s: Starting AMD thresholding on bank %d\n", __func__,
 		    i);
 
 	cc = &amd_et_state[PCPU_GET(cpuid)][i];
 	cc->cur_threshold = 1;
 	amd_thresholding_start(cc, i);
 
 	/* Mark this bank as monitored. */
 	PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
 }
 
 static void
 amd_thresholding_resume(int i)
 {
 	struct amd_et_state *cc;
 
 	KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
 
 	/* Ignore banks not monitored by this CPU. */
 	if (!(PCPU_GET(cmci_mask) & 1 << i))
 		return;
 
 	cc = &amd_et_state[PCPU_GET(cpuid)][i];
 	cc->last_intr = 0;
 	cc->cur_threshold = 1;
 	amd_thresholding_start(cc, i);
 }
 #endif
 
 /*
  * Initializes per-CPU machine check registers and enables corrected
  * machine check interrupts.
  */
 static void
 _mca_init(int boot)
 {
 	uint64_t mcg_cap;
 	uint64_t ctl, mask;
 	int i, skip, family;
 
 	family = CPUID_TO_FAMILY(cpu_id);
 
 	/* MCE is required. */
 	if (!mca_enabled || !(cpu_feature & CPUID_MCE))
 		return;
 
 	if (cpu_feature & CPUID_MCA) {
 		if (boot)
 			PCPU_SET(cmci_mask, 0);
 
 		mcg_cap = rdmsr(MSR_MCG_CAP);
 		if (mcg_cap & MCG_CAP_CTL_P)
 			/* Enable MCA features. */
 			wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
 		if (IS_BSP() && boot)
 			mca_setup(mcg_cap);
 
 		/*
 		 * Disable logging of level one TLB parity (L1TP) errors by
 		 * the data cache as an alternative workaround for AMD Family
 		 * 10h Erratum 383.  Unlike the recommended workaround, there
 		 * is no performance penalty to this workaround.  However,
 		 * L1TP errors will go unreported.
 		 */
 		if (cpu_vendor_id == CPU_VENDOR_AMD && family == 0x10 &&
 		    !amd10h_L1TP) {
 			mask = rdmsr(MSR_MC0_CTL_MASK);
 			if ((mask & (1UL << 5)) == 0)
 				wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
 		}
 		if (amd_rascap & AMDRAS_SCALABLE_MCA) {
 			mca_msr_ops.ctl = mca_smca_ctl_reg;
 			mca_msr_ops.status = mca_smca_status_reg;
 			mca_msr_ops.addr = mca_smca_addr_reg;
 			mca_msr_ops.misc = mca_smca_misc_reg;
 		}
 
 		/*
 		 * The cmci_monitor() must not be executed
 		 * simultaneously by several CPUs.
 		 */
 		if (boot)
 			mtx_lock_spin(&mca_lock);
 
 		for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
 			/* By default enable logging of all errors. */
 			ctl = 0xffffffffffffffffUL;
 			skip = 0;
 
 			if (cpu_vendor_id == CPU_VENDOR_INTEL) {
 				/*
 				 * For P6 models before Nehalem MC0_CTL is
 				 * always enabled and reserved.
 				 */
 				if (i == 0 && family == 0x6
 				    && CPUID_TO_MODEL(cpu_id) < 0x1a)
 					skip = 1;
 			} else if (cpu_vendor_id == CPU_VENDOR_AMD) {
 				/* BKDG for Family 10h: unset GartTblWkEn. */
 				if (i == MC_AMDNB_BANK && family >= 0xf &&
 				    family < 0x17)
 					ctl &= ~(1UL << 10);
 			}
 
 			if (!skip)
 				wrmsr(mca_msr_ops.ctl(i), ctl);
 
 #ifdef DEV_APIC
 			if (cmci_supported(mcg_cap)) {
 				if (boot)
 					cmci_monitor(i);
 				else
 					cmci_resume(i);
 			} else if (amd_thresholding_supported()) {
 				if (boot)
 					amd_thresholding_monitor(i);
 				else
 					amd_thresholding_resume(i);
 			}
 #endif
 
 			/* Clear all errors. */
 			wrmsr(mca_msr_ops.status(i), 0);
 		}
 		if (boot)
 			mtx_unlock_spin(&mca_lock);
 
 #ifdef DEV_APIC
 		if (!amd_thresholding_supported() &&
 		    PCPU_GET(cmci_mask) != 0 && boot)
 			lapic_enable_cmc();
 #endif
 	}
 
 	load_cr4(rcr4() | CR4_MCE);
 }
 
 /* Must be executed on each CPU during boot. */
 void
 mca_init(void)
 {
 
 	_mca_init(1);
 }
 
 /* Must be executed on each CPU during resume. */
 void
 mca_resume(void)
 {
 
 	_mca_init(0);
 }
 
 /*
  * The machine check registers for the BSP cannot be initialized until
  * the local APIC is initialized.  This happens at SI_SUB_CPU,
  * SI_ORDER_SECOND.
  */
 static void
 mca_init_bsp(void *arg __unused)
 {
 
 	mca_init();
 }
 SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
 
 /* Called when a machine check exception fires. */
 void
 mca_intr(void)
 {
 	uint64_t mcg_status;
 	int recoverable, count;
 
 	if (!(cpu_feature & CPUID_MCA)) {
 		/*
 		 * Just print the values of the old Pentium registers
 		 * and panic.
 		 */
 		printf("MC Type: 0x%jx  Address: 0x%jx\n",
 		    (uintmax_t)rdmsr(MSR_P5_MC_TYPE),
 		    (uintmax_t)rdmsr(MSR_P5_MC_ADDR));
 		panic("Machine check");
 	}
 
 	/* Scan the banks and check for any non-recoverable errors. */
 	count = mca_scan(MCE, &recoverable);
 	mcg_status = rdmsr(MSR_MCG_STATUS);
 	if (!(mcg_status & MCG_STATUS_RIPV))
 		recoverable = 0;
 
 	if (!recoverable) {
 		/*
 		 * Only panic if the error was detected local to this CPU.
 		 * Some errors will assert a machine check on all CPUs, but
 		 * only certain CPUs will find a valid bank to log.
 		 */
 		while (count == 0)
 			cpu_spinwait();
 
 		panic("Unrecoverable machine check exception");
 	}
 
 	/* Clear MCIP. */
 	wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
 }
 
 #ifdef DEV_APIC
 /* Called for a CMCI (correctable machine check interrupt). */
 void
 cmc_intr(void)
 {
 
 	/*
 	 * Serialize MCA bank scanning to prevent collisions from
 	 * sibling threads.
 	 *
 	 * If we found anything, log them to the console.
 	 */
 	if (mca_scan(CMCI, NULL) != 0)
 		mca_process_records(CMCI);
 }
 #endif
Index: head/sys/x86/x86/mp_x86.c
===================================================================
--- head/sys/x86/x86/mp_x86.c	(revision 356939)
+++ head/sys/x86/x86/mp_x86.c	(revision 356940)
@@ -1,1844 +1,1845 @@
 /*-
  * Copyright (c) 1996, by Steve Passe
  * Copyright (c) 2003, by Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifdef __i386__
 #include "opt_apic.h"
 #endif
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
 #include "opt_pmap.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/cpuset.h>
 #include <sys/csan.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <x86/ucode.h>
 
 static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items");
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 int bootAP;
 
 /* Free these after use */
 void *bootstacks[MAXCPU];
 void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
 struct susppcb **susppcbs;
 
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
 static u_long *ipi_preempt_counts[MAXCPU];
 static u_long *ipi_ast_counts[MAXCPU];
 u_long *ipi_invltlb_counts[MAXCPU];
 u_long *ipi_invlrng_counts[MAXCPU];
 u_long *ipi_invlpg_counts[MAXCPU];
 u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
 /* Default cpu_ops implementation. */
 struct cpu_ops cpu_ops;
 
 /*
  * Local data and functions.
  */
 
 static volatile cpuset_t ipi_stop_nmi_pending;
 
 volatile cpuset_t resuming_cpus;
 volatile cpuset_t toresume_cpus;
 
 /* used to hold the AP's until we are ready to release them */
 struct mtx ap_boot_mtx;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 /*
  * Store data from cpu_add() until later in the boot when we actually setup
  * the APs.
  */
 struct cpu_info *cpu_info;
 int *apic_cpuids;
 int cpu_apic_ids[MAXCPU];
 _Static_assert(MAXCPU <= MAX_APIC_ID,
     "MAXCPU cannot be larger that MAX_APIC_ID");
 _Static_assert(xAPIC_MAX_APIC_ID <= MAX_APIC_ID,
     "xAPIC_MAX_APIC_ID cannot be larger that MAX_APIC_ID");
 
 static void	release_aps(void *dummy);
 static void	cpustop_handler_post(u_int cpu);
 
 static int	hyperthreading_allowed = 1;
 SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN,
 	&hyperthreading_allowed, 0, "Use Intel HTT logical CPUs");
 
 static int	hyperthreading_intr_allowed = 0;
 SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_intr_allowed, CTLFLAG_RDTUN,
 	&hyperthreading_intr_allowed, 0,
 	"Allow interrupts on HTT logical CPUs");
 
 static struct topo_node topo_root;
 
 static int pkg_id_shift;
 static int node_id_shift;
 static int core_id_shift;
 static int disabled_cpus;
 
 struct cache_info {
 	int	id_shift;
 	int	present;
 } static caches[MAX_CACHE_LEVELS];
 
 unsigned int boot_address;
 
 static bool stop_mwait = false;
 SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
     "Use MONITOR/MWAIT when stopping CPU, if available");
 
 #define MiB(v)	(v ## ULL << 20)
 
 void
 mem_range_AP_init(void)
 {
 
 	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
 		mem_range_softc.mr_op->initAP(&mem_range_softc);
 }
 
 /*
  * Round up to the next power of two, if necessary, and then
  * take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 mask_width(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 /*
  * Add a cache level to the cache topology description.
  */
 static int
 add_deterministic_cache(int type, int level, int share_count)
 {
 
 	if (type == 0)
 		return (0);
 	if (type > 3) {
 		printf("unexpected cache type %d\n", type);
 		return (1);
 	}
 	if (type == 2) /* ignore instruction cache */
 		return (1);
 	if (level == 0 || level > MAX_CACHE_LEVELS) {
 		printf("unexpected cache level %d\n", type);
 		return (1);
 	}
 
 	if (caches[level - 1].present) {
 		printf("WARNING: multiple entries for L%u data cache\n", level);
 		printf("%u => %u\n", caches[level - 1].id_shift,
 		    mask_width(share_count));
 	}
 	caches[level - 1].id_shift = mask_width(share_count);
 	caches[level - 1].present = 1;
 
 	if (caches[level - 1].id_shift > pkg_id_shift) {
 		printf("WARNING: L%u data cache covers more "
 		    "APIC IDs than a package (%u > %u)\n", level,
 		    caches[level - 1].id_shift, pkg_id_shift);
 		caches[level - 1].id_shift = pkg_id_shift;
 	}
 	if (caches[level - 1].id_shift < core_id_shift) {
 		printf("WARNING: L%u data cache covers fewer "
 		    "APIC IDs than a core (%u < %u)\n", level,
 		    caches[level - 1].id_shift, core_id_shift);
 		caches[level - 1].id_shift = core_id_shift;
 	}
 
 	return (1);
 }
 
 /*
  * Determine topology of processing units and caches for AMD CPUs.
  * See:
  *  - AMD CPUID Specification (Publication # 25481)
  *  - BKDG for AMD NPT Family 0Fh Processors (Publication # 32559)
  *  - BKDG For AMD Family 10h Processors (Publication # 31116)
  *  - BKDG For AMD Family 15h Models 00h-0Fh Processors (Publication # 42301)
  *  - BKDG For AMD Family 16h Models 00h-0Fh Processors (Publication # 48751)
  *  - PPR For AMD Family 17h Models 00h-0Fh Processors (Publication # 54945)
  */
 static void
 topo_probe_amd(void)
 {
 	u_int p[4];
 	uint64_t v;
 	int level;
 	int nodes_per_socket;
 	int share_count;
 	int type;
 	int i;
 
 	/* No multi-core capability. */
 	if ((amd_feature2 & AMDID2_CMP) == 0)
 		return;
 
 	/* For families 10h and newer. */
 	pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
 	    AMDID_COREID_SIZE_SHIFT;
 
 	/* For 0Fh family. */
 	if (pkg_id_shift == 0)
 		pkg_id_shift =
 		    mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1);
 
 	/*
 	 * Families prior to 16h define the following value as
 	 * cores per compute unit and we don't really care about the AMD
 	 * compute units at the moment.  Perhaps we should treat them as
 	 * cores and cores within the compute units as hardware threads,
 	 * but that's up for debate.
 	 * Later families define the value as threads per compute unit,
 	 * so we are following AMD's nomenclature here.
 	 */
 	if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 &&
 	    CPUID_TO_FAMILY(cpu_id) >= 0x16) {
 		cpuid_count(0x8000001e, 0, p);
 		share_count = ((p[1] >> 8) & 0xff) + 1;
 		core_id_shift = mask_width(share_count);
 
 		/*
 		 * For Zen (17h), gather Nodes per Processor.  Each node is a
 		 * Zeppelin die; TR and EPYC CPUs will have multiple dies per
 		 * package.  Communication latency between dies is higher than
 		 * within them.
 		 */
 		nodes_per_socket = ((p[2] >> 8) & 0x7) + 1;
 		node_id_shift = pkg_id_shift - mask_width(nodes_per_socket);
 	}
 
 	if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) {
 		for (i = 0; ; i++) {
 			cpuid_count(0x8000001d, i, p);
 			type = p[0] & 0x1f;
 			level = (p[0] >> 5) & 0x7;
 			share_count = 1 + ((p[0] >> 14) & 0xfff);
 
 			if (!add_deterministic_cache(type, level, share_count))
 				break;
 		}
 	} else {
 		if (cpu_exthigh >= 0x80000005) {
 			cpuid_count(0x80000005, 0, p);
 			if (((p[2] >> 24) & 0xff) != 0) {
 				caches[0].id_shift = 0;
 				caches[0].present = 1;
 			}
 		}
 		if (cpu_exthigh >= 0x80000006) {
 			cpuid_count(0x80000006, 0, p);
 			if (((p[2] >> 16) & 0xffff) != 0) {
 				caches[1].id_shift = 0;
 				caches[1].present = 1;
 			}
 			if (((p[3] >> 18) & 0x3fff) != 0) {
 				nodes_per_socket = 1;
 				if ((amd_feature2 & AMDID2_NODE_ID) != 0) {
 					/*
 					 * Handle multi-node processors that
 					 * have multiple chips, each with its
 					 * own L3 cache, on the same die.
 					 */
 					v = rdmsr(0xc001100c);
 					nodes_per_socket = 1 + ((v >> 3) & 0x7);
 				}
 				caches[2].id_shift =
 				    pkg_id_shift - mask_width(nodes_per_socket);
 				caches[2].present = 1;
 			}
 		}
 	}
 }
 
 /*
  * Determine topology of processing units for Intel CPUs
  * using CPUID Leaf 1 and Leaf 4, if supported.
  * See:
  *  - Intel 64 Architecture Processor Topology Enumeration
  *  - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
  *    Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
  *    FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
  */
 static void
 topo_probe_intel_0x4(void)
 {
 	u_int p[4];
 	int max_cores;
 	int max_logical;
 
 	/* Both zero and one here mean one logical processor per package. */
 	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
 	    (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
 	if (max_logical <= 1)
 		return;
 
 	if (cpu_high >= 0x4) {
 		cpuid_count(0x04, 0, p);
 		max_cores = ((p[0] >> 26) & 0x3f) + 1;
 	} else
 		max_cores = 1;
 
 	core_id_shift = mask_width(max_logical/max_cores);
 	KASSERT(core_id_shift >= 0,
 	    ("intel topo: max_cores > max_logical\n"));
 	pkg_id_shift = core_id_shift + mask_width(max_cores);
 }
 
 /*
  * Determine topology of processing units for Intel CPUs
  * using CPUID Leaf 11, if supported.
  * See:
  *  - Intel 64 Architecture Processor Topology Enumeration
  *  - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
  *    Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
  *    FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
  */
 static void
 topo_probe_intel_0xb(void)
 {
 	u_int p[4];
 	int bits;
 	int type;
 	int i;
 
 	/* Fall back if CPU leaf 11 doesn't really exist. */
 	cpuid_count(0x0b, 0, p);
 	if (p[1] == 0) {
 		topo_probe_intel_0x4();
 		return;
 	}
 
 	/* We only support three levels for now. */
 	for (i = 0; ; i++) {
 		cpuid_count(0x0b, i, p);
 
 		bits = p[0] & 0x1f;
 		type = (p[2] >> 8) & 0xff;
 
 		if (type == 0)
 			break;
 
 		/* TODO: check for duplicate (re-)assignment */
 		if (type == CPUID_TYPE_SMT)
 			core_id_shift = bits;
 		else if (type == CPUID_TYPE_CORE)
 			pkg_id_shift = bits;
 		else
 			printf("unknown CPU level type %d\n", type);
 	}
 
 	if (pkg_id_shift < core_id_shift) {
 		printf("WARNING: core covers more APIC IDs than a package\n");
 		core_id_shift = pkg_id_shift;
 	}
 }
 
 /*
  * Determine topology of caches for Intel CPUs.
  * See:
  *  - Intel 64 Architecture Processor Topology Enumeration
  *  - Intel 64 and IA-32 Architectures Software Developer’s Manual
  *    Volume 2A: Instruction Set Reference, A-M,
  *    CPUID instruction
  */
 static void
 topo_probe_intel_caches(void)
 {
 	u_int p[4];
 	int level;
 	int share_count;
 	int type;
 	int i;
 
 	if (cpu_high < 0x4) {
 		/*
 		 * Available cache level and sizes can be determined
 		 * via CPUID leaf 2, but that requires a huge table of hardcoded
 		 * values, so for now just assume L1 and L2 caches potentially
 		 * shared only by HTT processing units, if HTT is present.
 		 */
 		caches[0].id_shift = pkg_id_shift;
 		caches[0].present = 1;
 		caches[1].id_shift = pkg_id_shift;
 		caches[1].present = 1;
 		return;
 	}
 
 	for (i = 0; ; i++) {
 		cpuid_count(0x4, i, p);
 		type = p[0] & 0x1f;
 		level = (p[0] >> 5) & 0x7;
 		share_count = 1 + ((p[0] >> 14) & 0xfff);
 
 		if (!add_deterministic_cache(type, level, share_count))
 			break;
 	}
 }
 
 /*
  * Determine topology of processing units and caches for Intel CPUs.
  * See:
  *  - Intel 64 Architecture Processor Topology Enumeration
  */
 static void
 topo_probe_intel(void)
 {
 
 	/*
 	 * Note that 0x1 <= cpu_high < 4 case should be
 	 * compatible with topo_probe_intel_0x4() logic when
 	 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
 	 * or it should trigger the fallback otherwise.
 	 */
 	if (cpu_high >= 0xb)
 		topo_probe_intel_0xb();
 	else if (cpu_high >= 0x1)
 		topo_probe_intel_0x4();
 
 	topo_probe_intel_caches();
 }
 
 /*
  * Topology information is queried only on BSP, on which this
  * code runs and for which it can query CPUID information.
  * Then topology is extrapolated on all packages using an
  * assumption that APIC ID to hardware component ID mapping is
  * homogenious.
  * That doesn't necesserily imply that the topology is uniform.
  */
 void
 topo_probe(void)
 {
 	static int cpu_topo_probed = 0;
 	struct x86_topo_layer {
 		int type;
 		int subtype;
 		int id_shift;
 	} topo_layers[MAX_CACHE_LEVELS + 4];
 	struct topo_node *parent;
 	struct topo_node *node;
 	int layer;
 	int nlayers;
 	int node_id;
 	int i;
 
 	if (cpu_topo_probed)
 		return;
 
 	CPU_ZERO(&logical_cpus_mask);
 
 	if (mp_ncpus <= 1)
 		; /* nothing */
-	else if (cpu_vendor_id == CPU_VENDOR_AMD)
+	else if (cpu_vendor_id == CPU_VENDOR_AMD ||
+	    cpu_vendor_id == CPU_VENDOR_HYGON)
 		topo_probe_amd();
 	else if (cpu_vendor_id == CPU_VENDOR_INTEL)
 		topo_probe_intel();
 
 	KASSERT(pkg_id_shift >= core_id_shift,
 	    ("bug in APIC topology discovery"));
 
 	nlayers = 0;
 	bzero(topo_layers, sizeof(topo_layers));
 
 	topo_layers[nlayers].type = TOPO_TYPE_PKG;
 	topo_layers[nlayers].id_shift = pkg_id_shift;
 	if (bootverbose)
 		printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift);
 	nlayers++;
 
 	if (pkg_id_shift > node_id_shift && node_id_shift != 0) {
 		topo_layers[nlayers].type = TOPO_TYPE_GROUP;
 		topo_layers[nlayers].id_shift = node_id_shift;
 		if (bootverbose)
 			printf("Node ID shift: %u\n",
 			    topo_layers[nlayers].id_shift);
 		nlayers++;
 	}
 
 	/*
 	 * Consider all caches to be within a package/chip
 	 * and "in front" of all sub-components like
 	 * cores and hardware threads.
 	 */
 	for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) {
 		if (caches[i].present) {
 			if (node_id_shift != 0)
 				KASSERT(caches[i].id_shift <= node_id_shift,
 					("bug in APIC topology discovery"));
 			KASSERT(caches[i].id_shift <= pkg_id_shift,
 				("bug in APIC topology discovery"));
 			KASSERT(caches[i].id_shift >= core_id_shift,
 				("bug in APIC topology discovery"));
 
 			topo_layers[nlayers].type = TOPO_TYPE_CACHE;
 			topo_layers[nlayers].subtype = i + 1;
 			topo_layers[nlayers].id_shift = caches[i].id_shift;
 			if (bootverbose)
 				printf("L%u cache ID shift: %u\n",
 				    topo_layers[nlayers].subtype,
 				    topo_layers[nlayers].id_shift);
 			nlayers++;
 		}
 	}
 
 	if (pkg_id_shift > core_id_shift) {
 		topo_layers[nlayers].type = TOPO_TYPE_CORE;
 		topo_layers[nlayers].id_shift = core_id_shift;
 		if (bootverbose)
 			printf("Core ID shift: %u\n",
 			    topo_layers[nlayers].id_shift);
 		nlayers++;
 	}
 
 	topo_layers[nlayers].type = TOPO_TYPE_PU;
 	topo_layers[nlayers].id_shift = 0;
 	nlayers++;
 
 	topo_init_root(&topo_root);
 	for (i = 0; i <= max_apic_id; ++i) {
 		if (!cpu_info[i].cpu_present)
 			continue;
 
 		parent = &topo_root;
 		for (layer = 0; layer < nlayers; ++layer) {
 			node_id = i >> topo_layers[layer].id_shift;
 			parent = topo_add_node_by_hwid(parent, node_id,
 			    topo_layers[layer].type,
 			    topo_layers[layer].subtype);
 		}
 	}
 
 	parent = &topo_root;
 	for (layer = 0; layer < nlayers; ++layer) {
 		node_id = boot_cpu_id >> topo_layers[layer].id_shift;
 		node = topo_find_node_by_hwid(parent, node_id,
 		    topo_layers[layer].type,
 		    topo_layers[layer].subtype);
 		topo_promote_child(node);
 		parent = node;
 	}
 
 	cpu_topo_probed = 1;
 }
 
 /*
  * Assign logical CPU IDs to local APICs.
  */
 void
 assign_cpu_ids(void)
 {
 	struct topo_node *node;
 	u_int smt_mask;
 	int nhyper;
 
 	smt_mask = (1u << core_id_shift) - 1;
 
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
 	 */
 	mp_ncpus = 0;
 	nhyper = 0;
 	TOPO_FOREACH(node, &topo_root) {
 		if (node->type != TOPO_TYPE_PU)
 			continue;
 
 		if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask))
 			cpu_info[node->hwid].cpu_hyperthread = 1;
 
 		if (resource_disabled("lapic", node->hwid)) {
 			if (node->hwid != boot_cpu_id)
 				cpu_info[node->hwid].cpu_disabled = 1;
 			else
 				printf("Cannot disable BSP, APIC ID = %d\n",
 				    node->hwid);
 		}
 
 		if (!hyperthreading_allowed &&
 		    cpu_info[node->hwid].cpu_hyperthread)
 			cpu_info[node->hwid].cpu_disabled = 1;
 
 		if (mp_ncpus >= MAXCPU)
 			cpu_info[node->hwid].cpu_disabled = 1;
 
 		if (cpu_info[node->hwid].cpu_disabled) {
 			disabled_cpus++;
 			continue;
 		}
 
 		if (cpu_info[node->hwid].cpu_hyperthread)
 			nhyper++;
 
 		cpu_apic_ids[mp_ncpus] = node->hwid;
 		apic_cpuids[node->hwid] = mp_ncpus;
 		topo_set_pu_id(node, mp_ncpus);
 		mp_ncpus++;
 	}
 
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
 	    mp_ncpus));
 
 	mp_ncores = mp_ncpus - nhyper;
 	smp_threads_per_core = mp_ncpus / mp_ncores;
 }
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	struct topo_node *node;
 	const char *hyperthread;
 	struct topo_analysis topology;
 
 	printf("FreeBSD/SMP: ");
 	if (topo_analyze(&topo_root, 1, &topology)) {
 		printf("%d package(s)", topology.entities[TOPO_LEVEL_PKG]);
 		if (topology.entities[TOPO_LEVEL_GROUP] > 1)
 			printf(" x %d groups",
 			    topology.entities[TOPO_LEVEL_GROUP]);
 		if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
 			printf(" x %d cache groups",
 			    topology.entities[TOPO_LEVEL_CACHEGROUP]);
 		if (topology.entities[TOPO_LEVEL_CORE] > 0)
 			printf(" x %d core(s)",
 			    topology.entities[TOPO_LEVEL_CORE]);
 		if (topology.entities[TOPO_LEVEL_THREAD] > 1)
 			printf(" x %d hardware threads",
 			    topology.entities[TOPO_LEVEL_THREAD]);
 	} else {
 		printf("Non-uniform topology");
 	}
 	printf("\n");
 
 	if (disabled_cpus) {
 		printf("FreeBSD/SMP Online: ");
 		if (topo_analyze(&topo_root, 0, &topology)) {
 			printf("%d package(s)",
 			    topology.entities[TOPO_LEVEL_PKG]);
 			if (topology.entities[TOPO_LEVEL_GROUP] > 1)
 				printf(" x %d groups",
 				    topology.entities[TOPO_LEVEL_GROUP]);
 			if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
 				printf(" x %d cache groups",
 				    topology.entities[TOPO_LEVEL_CACHEGROUP]);
 			if (topology.entities[TOPO_LEVEL_CORE] > 0)
 				printf(" x %d core(s)",
 				    topology.entities[TOPO_LEVEL_CORE]);
 			if (topology.entities[TOPO_LEVEL_THREAD] > 1)
 				printf(" x %d hardware threads",
 				    topology.entities[TOPO_LEVEL_THREAD]);
 		} else {
 			printf("Non-uniform topology");
 		}
 		printf("\n");
 	}
 
 	if (!bootverbose)
 		return;
 
 	TOPO_FOREACH(node, &topo_root) {
 		switch (node->type) {
 		case TOPO_TYPE_PKG:
 			printf("Package HW ID = %u\n", node->hwid);
 			break;
 		case TOPO_TYPE_CORE:
 			printf("\tCore HW ID = %u\n", node->hwid);
 			break;
 		case TOPO_TYPE_PU:
 			if (cpu_info[node->hwid].cpu_hyperthread)
 				hyperthread = "/HT";
 			else
 				hyperthread = "";
 
 			if (node->subtype == 0)
 				printf("\t\tCPU (AP%s): APIC ID: %u"
 				    "(disabled)\n", hyperthread, node->hwid);
 			else if (node->id == 0)
 				printf("\t\tCPU0 (BSP): APIC ID: %u\n",
 				    node->hwid);
 			else
 				printf("\t\tCPU%u (AP%s): APIC ID: %u\n",
 				    node->id, hyperthread, node->hwid);
 			break;
 		default:
 			/* ignored */
 			break;
 		}
 	}
 }
 
 /*
  * Add a scheduling group, a group of logical processors sharing
  * a particular cache (and, thus having an affinity), to the scheduling
  * topology.
  * This function recursively works on lower level caches.
  */
 static void
 x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root)
 {
 	struct topo_node *node;
 	int nchildren;
 	int ncores;
 	int i;
 
 	KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE ||
 	    root->type == TOPO_TYPE_GROUP,
 	    ("x86topo_add_sched_group: bad type: %u", root->type));
 	CPU_COPY(&root->cpuset, &cg_root->cg_mask);
 	cg_root->cg_count = root->cpu_count;
 	if (root->type == TOPO_TYPE_SYSTEM)
 		cg_root->cg_level = CG_SHARE_NONE;
 	else
 		cg_root->cg_level = root->subtype;
 
 	/*
 	 * Check how many core nodes we have under the given root node.
 	 * If we have multiple logical processors, but not multiple
 	 * cores, then those processors must be hardware threads.
 	 */
 	ncores = 0;
 	node = root;
 	while (node != NULL) {
 		if (node->type != TOPO_TYPE_CORE) {
 			node = topo_next_node(root, node);
 			continue;
 		}
 
 		ncores++;
 		node = topo_next_nonchild_node(root, node);
 	}
 
 	if (cg_root->cg_level != CG_SHARE_NONE &&
 	    root->cpu_count > 1 && ncores < 2)
 		cg_root->cg_flags = CG_FLAG_SMT;
 
 	/*
 	 * Find out how many cache nodes we have under the given root node.
 	 * We ignore cache nodes that cover all the same processors as the
 	 * root node.  Also, we do not descend below found cache nodes.
 	 * That is, we count top-level "non-redundant" caches under the root
 	 * node.
 	 */
 	nchildren = 0;
 	node = root;
 	while (node != NULL) {
 		if ((node->type != TOPO_TYPE_GROUP &&
 		    node->type != TOPO_TYPE_CACHE) ||
 		    (root->type != TOPO_TYPE_SYSTEM &&
 		    CPU_CMP(&node->cpuset, &root->cpuset) == 0)) {
 			node = topo_next_node(root, node);
 			continue;
 		}
 		nchildren++;
 		node = topo_next_nonchild_node(root, node);
 	}
 
 	cg_root->cg_child = smp_topo_alloc(nchildren);
 	cg_root->cg_children = nchildren;
 
 	/*
 	 * Now find again the same cache nodes as above and recursively
 	 * build scheduling topologies for them.
 	 */
 	node = root;
 	i = 0;
 	while (node != NULL) {
 		if ((node->type != TOPO_TYPE_GROUP &&
 		    node->type != TOPO_TYPE_CACHE) ||
 		    (root->type != TOPO_TYPE_SYSTEM &&
 		    CPU_CMP(&node->cpuset, &root->cpuset) == 0)) {
 			node = topo_next_node(root, node);
 			continue;
 		}
 		cg_root->cg_child[i].cg_parent = cg_root;
 		x86topo_add_sched_group(node, &cg_root->cg_child[i]);
 		i++;
 		node = topo_next_nonchild_node(root, node);
 	}
 }
 
 /*
  * Build the MI scheduling topology from the discovered hardware topology.
  */
 struct cpu_group *
 cpu_topo(void)
 {
 	struct cpu_group *cg_root;
 
 	if (mp_ncpus <= 1)
 		return (smp_topo_none());
 
 	cg_root = smp_topo_alloc(1);
 	x86topo_add_sched_group(&topo_root, cg_root);
 	return (cg_root);
 }
 
 static void
 cpu_alloc(void *dummy __unused)
 {
 	/*
 	 * Dynamically allocate the arrays that depend on the
 	 * maximum APIC ID.
 	 */
 	cpu_info = malloc(sizeof(*cpu_info) * (max_apic_id + 1), M_CPUS,
 	    M_WAITOK | M_ZERO);
 	apic_cpuids = malloc(sizeof(*apic_cpuids) * (max_apic_id + 1), M_CPUS,
 	    M_WAITOK | M_ZERO);
 }
 SYSINIT(cpu_alloc, SI_SUB_CPU, SI_ORDER_FIRST, cpu_alloc, NULL);
 
 /*
  * Add a logical CPU to the topology.
  */
 void
 cpu_add(u_int apic_id, char boot_cpu)
 {
 
 	if (apic_id > max_apic_id) {
 		panic("SMP: APIC ID %d too high", apic_id);
 		return;
 	}
 	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %u added twice",
 	    apic_id));
 	cpu_info[apic_id].cpu_present = 1;
 	if (boot_cpu) {
 		KASSERT(boot_cpu_id == -1,
 		    ("CPU %u claims to be BSP, but CPU %u already is", apic_id,
 		    boot_cpu_id));
 		boot_cpu_id = apic_id;
 		cpu_info[apic_id].cpu_bsp = 1;
 	}
 	if (bootverbose)
 		printf("SMP: Added CPU %u (%s)\n", apic_id, boot_cpu ? "BSP" :
 		    "AP");
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	/*
 	 * mp_ncpus and mp_maxid should be already set by calls to cpu_add().
 	 * If there were no calls to cpu_add() assume this is a UP system.
 	 */
 	if (mp_ncpus == 0)
 		mp_ncpus = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
 	CPU_SETOF(0, &all_cpus);
 	return (mp_ncpus > 1);
 }
 
 /* Allocate memory for the AP trampoline. */
 void
 alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx)
 {
 	unsigned int i;
 	bool allocated;
 
 	allocated = false;
 	for (i = *physmap_idx; i <= *physmap_idx; i -= 2) {
 		/*
 		 * Find a memory region big enough and below the 1MB boundary
 		 * for the trampoline code.
 		 * NB: needs to be page aligned.
 		 */
 		if (physmap[i] >= MiB(1) ||
 		    (trunc_page(physmap[i + 1]) - round_page(physmap[i])) <
 		    round_page(bootMP_size))
 			continue;
 
 		allocated = true;
 		/*
 		 * Try to steal from the end of the region to mimic previous
 		 * behaviour, else fallback to steal from the start.
 		 */
 		if (physmap[i + 1] < MiB(1)) {
 			boot_address = trunc_page(physmap[i + 1]);
 			if ((physmap[i + 1] - boot_address) < bootMP_size)
 				boot_address -= round_page(bootMP_size);
 			physmap[i + 1] = boot_address;
 		} else {
 			boot_address = round_page(physmap[i]);
 			physmap[i] = boot_address + round_page(bootMP_size);
 		}
 		if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) {
 			memmove(&physmap[i], &physmap[i + 2],
 			    sizeof(*physmap) * (*physmap_idx - i + 2));
 			*physmap_idx -= 2;
 		}
 		break;
 	}
 
 	if (!allocated) {
 		boot_address = basemem * 1024 - bootMP_size;
 		if (bootverbose)
 			printf(
 "Cannot find enough space for the boot trampoline, placing it at %#x",
 			    boot_address);
 	}
 }
 
 /*
  * AP CPU's call this to initialize themselves.
  */
 void
 init_secondary_tail(void)
 {
 	u_int cpuid;
 
 	pmap_activate_boot(vmspace_pmap(proc0.p_vmspace));
 
 	/*
 	 * On real hardware, switch to x2apic mode if possible.  Do it
 	 * after aps_ready was signalled, to avoid manipulating the
 	 * mode while BSP might still want to send some IPI to us
 	 * (second startup IPI is ignored on modern hardware etc).
 	 */
 	lapic_xapic_mode();
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up SSE/NX */
 	initializecpu();
 
 	/* set up FPU state on the AP */
 #ifdef __amd64__
 	fpuinit();
 #else
 	npxinit(false);
 #endif
 
 	if (cpu_ops.cpu_init)
 		cpu_ops.cpu_init();
 
 	/* A quick check from sanity claus */
 	cpuid = PCPU_GET(cpuid);
 	if (PCPU_GET(apic_id) != lapic_id()) {
 		printf("SMP: cpuid = %d\n", cpuid);
 		printf("SMP: actual apic_id = %d\n", lapic_id());
 		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Initialize curthread. */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	mca_init();
 
 	/* Init local apic for irq's */
 	lapic_setup(1);
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	smp_cpus++;
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
 	if (bootverbose)
 		printf("SMP: AP CPU #%d Launched!\n", cpuid);
 	else
 		printf("%s%d%s", smp_cpus == 2 ? "Launching APs: " : "",
 		    cpuid, smp_cpus == mp_ncpus ? "\n" : " ");
 
 	/* Determine if we are a logical CPU. */
 	if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread)
 		CPU_SET(cpuid, &logical_cpus_mask);
 
 	if (bootverbose)
 		lapic_dump("AP");
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 #ifdef __amd64__
 	/*
 	 * Enable global pages TLB extension
 	 * This also implicitly flushes the TLB 
 	 */
 	load_cr4(rcr4() | CR4_PGE);
 	if (pmap_pcid_enabled)
 		load_cr4(rcr4() | CR4_PCIDE);
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
 #endif
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Wait until all the AP's are up. */
 	while (atomic_load_acq_int(&smp_started) == 0)
 		ia32_pause();
 
 #ifndef EARLY_AP_STARTUP
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 #endif
 
 	kcsan_cpu_init(cpuid);
 
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 static void
 smp_after_idle_runnable(void *arg __unused)
 {
 	struct pcpu *pc;
 	int cpu;
 
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		pc = pcpu_find(cpu);
 		while (atomic_load_ptr(&pc->pc_curthread) == (uintptr_t)NULL)
 			cpu_spinwait();
 		kmem_free((vm_offset_t)bootstacks[cpu], kstack_pages *
 		    PAGE_SIZE);
 	}
 }
 SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
     smp_after_idle_runnable, NULL);
 
 /*
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
  * We also do not tell it about the BSP since it tells itself about
  * the BSP internally to work with UP kernels and on UP machines.
  */
 void
 set_interrupt_apic_ids(void)
 {
 	u_int i, apic_id;
 
 	for (i = 0; i < MAXCPU; i++) {
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
 		if (cpu_info[apic_id].cpu_bsp)
 			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
 		/* Don't let hyperthreads service interrupts. */
 		if (cpu_info[apic_id].cpu_hyperthread &&
 		    !hyperthreading_intr_allowed)
 			continue;
 
 		intr_add_cpu(i);
 	}
 }
 
 
 #ifdef COUNT_XINVLTLB_HITS
 u_int xhits_gbl[MAXCPU];
 u_int xhits_pg[MAXCPU];
 u_int xhits_rng[MAXCPU];
 static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
     sizeof(xhits_gbl), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
     sizeof(xhits_pg), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
     sizeof(xhits_rng), "IU", "");
 
 u_int ipi_global;
 u_int ipi_page;
 u_int ipi_range;
 u_int ipi_range_size;
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
     0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
  * Init and startup IPI.
  */
 void
 ipi_startup(int apic_id, int vector)
 {
 
 	/*
 	 * This attempts to follow the algorithm described in the
 	 * Intel Multiprocessor Specification v1.4 in section B.4.
 	 * For each IPI, we allow the local APIC ~20us to deliver the
 	 * IPI.  If that times out, we panic.
 	 */
 
 	/*
 	 * first we do an INIT IPI: this INIT IPI might be run, resetting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
 	lapic_ipi_wait(100);
 
 	/* Explicitly deassert the INIT IPI. */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
 	    apic_id);
 
 	DELAY(10000);		/* wait ~10mS */
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	if (!lapic_ipi_wait(100))
 		panic("Failed to deliver first STARTUP IPI to APIC %d",
 		    apic_id);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	if (!lapic_ipi_wait(100))
 		panic("Failed to deliver second STARTUP IPI to APIC %d",
 		    apic_id);
 
 	DELAY(200);		/* wait ~200uS */
 }
 
 /*
  * Send an IPI to specified CPU handling the bitmap logic.
  */
 void
 ipi_send_cpu(int cpu, u_int ipi)
 {
 	u_int bitmap, old, new;
 	u_int *cpu_bitmap;
 
 	KASSERT((u_int)cpu < MAXCPU && cpu_apic_ids[cpu] != -1,
 	    ("IPI to non-existent CPU %d", cpu));
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		bitmap = 1 << ipi;
 		ipi = IPI_BITMAP_VECTOR;
 		cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap;
 		old = *cpu_bitmap;
 		for (;;) {
 			if ((old & bitmap) == bitmap)
 				break;
 			new = old | bitmap;
 			if (atomic_fcmpset_int(cpu_bitmap, &old, new))
 				break;
 		}
 		if (old)
 			return;
 	}
 	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
 	td = curthread;
 	ipi_bitmap = atomic_readandclear_int(&cpuid_to_pcpu[cpu]->
 	    pc_ipi_bitmap);
 
 	/*
 	 * sched_preempt() must be called to clear the pending preempt
 	 * IPI to enable delivery of further preempts.  However, the
 	 * critical section will cause extra scheduler lock thrashing
 	 * when used unconditionally.  Only critical_enter() if
 	 * hardclock must also run, which requires the section entry.
 	 */
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK))
 		critical_enter();
 
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = &frame;
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
 		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
 		(*ipi_ast_counts[cpu])++;
 #endif
 		/* Nothing to do for AST */
 	}
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
 		hardclockintr();
 	}
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK))
 		critical_exit();
 }
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus);
 
 	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
 
 /*
  * send an IPI to a specific CPU.
  */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(other_cpus, ipi);
 		return;
 	}
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
 ipi_nmi_handler(void)
 {
 	u_int cpuid;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
 	 * source, if the bitmask for the current CPU is present in
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending))
 		return (1);
 
 	CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending);
 	cpustop_handler();
 	return (0);
 }
 
 int nmi_kdb_lock;
 
 void
 nmi_call_kdb_smp(u_int type, struct trapframe *frame)
 {
 	int cpu;
 	bool call_post;
 
 	cpu = PCPU_GET(cpuid);
 	if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
 		nmi_call_kdb(cpu, type, frame);
 		call_post = false;
 	} else {
 		savectx(&stoppcbs[cpu]);
 		CPU_SET_ATOMIC(cpu, &stopped_cpus);
 		while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1))
 			ia32_pause();
 		call_post = true;
 	}
 	atomic_store_rel_int(&nmi_kdb_lock, 0);
 	if (call_post)
 		cpustop_handler_post(cpu);
 }
 
 /*
  * Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
  * if available) until we are resumed.
  */
 void
 cpustop_handler(void)
 {
 	struct monitorbuf *mb;
 	u_int cpu;
 	bool use_mwait;
 
 	cpu = PCPU_GET(cpuid);
 
 	savectx(&stoppcbs[cpu]);
 
 	use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
 	    !mwait_cpustop_broken);
 	if (use_mwait) {
 		mb = PCPU_PTR(monitorbuf);
 		atomic_store_int(&mb->stop_state,
 		    MONITOR_STOPSTATE_STOPPED);
 	}
 
 	/* Indicate that we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus)) {
 		if (use_mwait) {
 			cpu_monitor(mb, 0, 0);
 			if (atomic_load_int(&mb->stop_state) ==
 			    MONITOR_STOPSTATE_STOPPED)
 				cpu_mwait(0, MWAIT_C1);
 			continue;
 		}
 
 		ia32_pause();
 
 		/*
 		 * Halt non-BSP CPUs on panic -- we're never going to need them
 		 * again, and might as well save power / release resources
 		 * (e.g., overprovisioned VM infrastructure).
 		 */
 		while (__predict_false(!IS_BSP() && KERNEL_PANICKED()))
 			halt();
 	}
 
 	cpustop_handler_post(cpu);
 }
 
 static void
 cpustop_handler_post(u_int cpu)
 {
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 
 	/*
 	 * We don't broadcast TLB invalidations to other CPUs when they are
 	 * stopped. Hence, we clear the TLB before resuming.
 	 */
 	invltlb_glob();
 
 #if defined(__amd64__) && defined(DDB)
 	amd64_db_resume_dbreg();
 #endif
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
 	}
 }
 
 /*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpususpend_handler(void)
 {
 	u_int cpu;
 
 	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
 
 	cpu = PCPU_GET(cpuid);
 	if (savectx(&susppcbs[cpu]->sp_pcb)) {
 #ifdef __amd64__
 		fpususpend(susppcbs[cpu]->sp_fpususpend);
 #else
 		npxsuspend(susppcbs[cpu]->sp_fpususpend);
 #endif
 		/*
 		 * suspended_cpus is cleared shortly after each AP is restarted
 		 * by a Startup IPI, so that the BSP can proceed to restarting
 		 * the next AP.
 		 *
 		 * resuming_cpus gets cleared when the AP completes
 		 * initialization after having been released by the BSP.
 		 * resuming_cpus is probably not the best name for the
 		 * variable, because it is actually a set of processors that
 		 * haven't resumed yet and haven't necessarily started resuming.
 		 *
 		 * Note that suspended_cpus is meaningful only for ACPI suspend
 		 * as it's not really used for Xen suspend since the APs are
 		 * automatically restored to the running state and the correct
 		 * context.  For the same reason resumectx is never called in
 		 * that case.
 		 */
 		CPU_SET_ATOMIC(cpu, &suspended_cpus);
 		CPU_SET_ATOMIC(cpu, &resuming_cpus);
 
 		/*
 		 * Invalidate the cache after setting the global status bits.
 		 * The last AP to set its bit may end up being an Owner of the
 		 * corresponding cache line in MOESI protocol.  The AP may be
 		 * stopped before the cache line is written to the main memory.
 		 */
 		wbinvd();
 	} else {
 #ifdef __amd64__
 		fpuresume(susppcbs[cpu]->sp_fpususpend);
 #else
 		npxresume(susppcbs[cpu]->sp_fpususpend);
 #endif
 		pmap_init_pat();
 		initializecpu();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 
 		/* Indicate that we have restarted and restored the context. */
 		CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	}
 
 	/* Wait for resume directive */
 	while (!CPU_ISSET(cpu, &toresume_cpus))
 		ia32_pause();
 
 	/* Re-apply microcode updates. */
 	ucode_reload();
 
 #ifdef __i386__
 	/* Finish removing the identity mapping of low memory for this AP. */
 	invltlb_glob();
 #endif
 
 	if (cpu_ops.cpu_resume)
 		cpu_ops.cpu_resume();
 #ifdef __amd64__
 	if (vmm_resume_p)
 		vmm_resume_p();
 #endif
 
 	/* Resume MCA and local APIC */
 	lapic_xapic_mode();
 	mca_resume();
 	lapic_setup(0);
 
 	/* Indicate that we are resumed */
 	CPU_CLR_ATOMIC(cpu, &resuming_cpus);
 	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	CPU_CLR_ATOMIC(cpu, &toresume_cpus);
 }
 
 
 void
 invlcache_handler(void)
 {
 	uint32_t generation;
 
 #ifdef COUNT_IPIS
 	(*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
 	/*
 	 * Reading the generation here allows greater parallelism
 	 * since wbinvd is a serializing instruction.  Without the
 	 * temporary, we'd wait for wbinvd to complete, then the read
 	 * would execute, then the dependent write, which must then
 	 * complete before return from interrupt.
 	 */
 	generation = smp_tlb_generation;
 	wbinvd();
 	PCPU_SET(smp_tlb_done, generation);
 }
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 static void
 release_aps(void *dummy __unused)
 {
 
 	if (mp_ncpus == 1) 
 		return;
 	atomic_store_rel_int(&aps_ready, 1);
 	while (smp_started == 0)
 		ia32_pause();
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
  */
 static void
 mp_ipi_intrcnt(void *dummy)
 {
 	char buf[64];
 	int i;
 
 	CPU_FOREACH(i) {
 		snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
 		intrcnt_add(buf, &ipi_invltlb_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
 		intrcnt_add(buf, &ipi_invlrng_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
 		intrcnt_add(buf, &ipi_invlpg_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
 		intrcnt_add(buf, &ipi_invlcache_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
 		intrcnt_add(buf, &ipi_preempt_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:ast", i);
 		intrcnt_add(buf, &ipi_ast_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
 		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
 	}		
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
 #endif
 
 /*
  * Flush the TLB on other CPU's
  */
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
 pmap_t smp_tlb_pmap;
 volatile uint32_t smp_tlb_generation;
 
 #ifdef __amd64__
 #define	read_eflags() read_rflags()
 #endif
 
 static void
 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
     vm_offset_t addr1, vm_offset_t addr2)
 {
 	cpuset_t other_cpus;
 	volatile uint32_t *p_cpudone;
 	uint32_t generation;
 	int cpu;
 
 	/* It is not necessary to signal other CPUs while in the debugger. */
 	if (kdb_active || KERNEL_PANICKED())
 		return;
 
 	/*
 	 * Check for other cpus.  Return if none.
 	 */
 	if (CPU_ISFULLSET(&mask)) {
 		if (mp_ncpus <= 1)
 			return;
 	} else {
 		CPU_CLR(PCPU_GET(cpuid), &mask);
 		if (CPU_EMPTY(&mask))
 			return;
 	}
 
 	if (!(read_eflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	smp_tlb_pmap = pmap;
 	generation = ++smp_tlb_generation;
 	if (CPU_ISFULLSET(&mask)) {
 		ipi_all_but_self(vector);
 		other_cpus = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	} else {
 		other_cpus = mask;
 		while ((cpu = CPU_FFS(&mask)) != 0) {
 			cpu--;
 			CPU_CLR(cpu, &mask);
 			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
 			    cpu, vector);
 			ipi_send_cpu(cpu, vector);
 		}
 	}
 	while ((cpu = CPU_FFS(&other_cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &other_cpus);
 		p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done;
 		while (*p_cpudone != generation)
 			ia32_pause();
 	}
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 void
 smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_global++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_page++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
     pmap_t pmap)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap,
 		    addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_range++;
 		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 smp_cache_flush(void)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
 		    0, 0);
 	}
 }
 
 /*
  * Handlers for TLB related IPIs
  */
 void
 invltlb_handler(void)
 {
 	uint32_t generation;
   
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_gbl[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
 #ifdef COUNT_IPIS
 	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
 	/*
 	 * Reading the generation here allows greater parallelism
 	 * since invalidating the TLB is a serializing operation.
 	 */
 	generation = smp_tlb_generation;
 	if (smp_tlb_pmap == kernel_pmap)
 		invltlb_glob();
 #ifdef __amd64__
 	else
 		invltlb();
 #endif
 	PCPU_SET(smp_tlb_done, generation);
 }
 
 void
 invlpg_handler(void)
 {
 	uint32_t generation;
 
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_pg[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
 #ifdef COUNT_IPIS
 	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
 	generation = smp_tlb_generation;	/* Overlap with serialization */
 #ifdef __i386__
 	if (smp_tlb_pmap == kernel_pmap)
 #endif
 		invlpg(smp_tlb_addr1);
 	PCPU_SET(smp_tlb_done, generation);
 }
 
 void
 invlrng_handler(void)
 {
 	vm_offset_t addr, addr2;
 	uint32_t generation;
 
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_rng[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
 #ifdef COUNT_IPIS
 	(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
 	addr = smp_tlb_addr1;
 	addr2 = smp_tlb_addr2;
 	generation = smp_tlb_generation;	/* Overlap with serialization */
 #ifdef __i386__
 	if (smp_tlb_pmap == kernel_pmap)
 #endif
 		do {
 			invlpg(addr);
 			addr += PAGE_SIZE;
 		} while (addr < addr2);
 
 	PCPU_SET(smp_tlb_done, generation);
 }
Index: head/sys/x86/x86/msi.c
===================================================================
--- head/sys/x86/x86/msi.c	(revision 356939)
+++ head/sys/x86/x86/msi.c	(revision 356940)
@@ -1,756 +1,757 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2006 Yahoo!, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Support for PCI Message Signalled Interrupts (MSI).  MSI interrupts on
  * x86 are basically APIC messages that the northbridge delivers directly
  * to the local APICs as if they had come from an I/O APIC.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <x86/apicreg.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/iommu/iommu_intrmap.h>
 #include <machine/specialreg.h>
 #include <dev/pci/pcivar.h>
 
 /* Fields in address for Intel MSI messages. */
 #define	MSI_INTEL_ADDR_DEST		0x000ff000
 #define	MSI_INTEL_ADDR_RH		0x00000008
 # define MSI_INTEL_ADDR_RH_ON		0x00000008
 # define MSI_INTEL_ADDR_RH_OFF		0x00000000
 #define	MSI_INTEL_ADDR_DM		0x00000004
 # define MSI_INTEL_ADDR_DM_PHYSICAL	0x00000000
 # define MSI_INTEL_ADDR_DM_LOGICAL	0x00000004
 
 /* Fields in data for Intel MSI messages. */
 #define	MSI_INTEL_DATA_TRGRMOD		IOART_TRGRMOD	/* Trigger mode. */
 # define MSI_INTEL_DATA_TRGREDG		IOART_TRGREDG
 # define MSI_INTEL_DATA_TRGRLVL		IOART_TRGRLVL
 #define	MSI_INTEL_DATA_LEVEL		0x00004000	/* Polarity. */
 # define MSI_INTEL_DATA_DEASSERT	0x00000000
 # define MSI_INTEL_DATA_ASSERT		0x00004000
 #define	MSI_INTEL_DATA_DELMOD		IOART_DELMOD	/* Delivery mode. */
 # define MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
 # define MSI_INTEL_DATA_DELLOPRI	IOART_DELLOPRI
 # define MSI_INTEL_DATA_DELSMI		IOART_DELSMI
 # define MSI_INTEL_DATA_DELNMI		IOART_DELNMI
 # define MSI_INTEL_DATA_DELINIT		IOART_DELINIT
 # define MSI_INTEL_DATA_DELEXINT	IOART_DELEXINT
 #define	MSI_INTEL_DATA_INTVEC		IOART_INTVEC	/* Interrupt vector. */
 
 /*
  * Build Intel MSI message and data values from a source.  AMD64 systems
  * seem to be compatible, so we use the same function for both.
  */
 #define	INTEL_ADDR(msi)							\
 	(MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 |			\
 	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
 #define	INTEL_DATA(msi)							\
 	(MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
 
 static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
 
 /*
  * MSI sources are bunched into groups.  This is because MSI forces
  * all of the messages to share the address and data registers and
  * thus certain properties (such as the local APIC ID target on x86).
  * Each group has a 'first' source that contains information global to
  * the group.  These fields are marked with (g) below.
  *
  * Note that local APIC ID is kind of special.  Each message will be
  * assigned an ID by the system; however, a group will use the ID from
  * the first message.
  *
  * For MSI-X, each message is isolated.
  */
 struct msi_intsrc {
 	struct intsrc msi_intsrc;
 	device_t msi_dev;		/* Owning device. (g) */
 	struct msi_intsrc *msi_first;	/* First source in group. */
 	u_int msi_irq;			/* IRQ cookie. */
 	u_int msi_msix;			/* MSI-X message. */
 	u_int msi_vector:8;		/* IDT vector. */
 	u_int msi_cpu;			/* Local APIC ID. (g) */
 	u_int msi_count:8;		/* Messages in this group. (g) */
 	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
 	u_int *msi_irqs;		/* Group's IRQ list. (g) */
 	u_int msi_remap_cookie;
 };
 
 static void	msi_create_source(void);
 static void	msi_enable_source(struct intsrc *isrc);
 static void	msi_disable_source(struct intsrc *isrc, int eoi);
 static void	msi_eoi_source(struct intsrc *isrc);
 static void	msi_enable_intr(struct intsrc *isrc);
 static void	msi_disable_intr(struct intsrc *isrc);
 static int	msi_vector(struct intsrc *isrc);
 static int	msi_source_pending(struct intsrc *isrc);
 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
 
 struct pic msi_pic = {
 	.pic_enable_source = msi_enable_source,
 	.pic_disable_source = msi_disable_source,
 	.pic_eoi_source = msi_eoi_source,
 	.pic_enable_intr = msi_enable_intr,
 	.pic_disable_intr = msi_disable_intr,
 	.pic_vector = msi_vector,
 	.pic_source_pending = msi_source_pending,
 	.pic_suspend = NULL,
 	.pic_resume = NULL,
 	.pic_config_intr = msi_config_intr,
 	.pic_assign_cpu = msi_assign_cpu,
 	.pic_reprogram_pin = NULL,
 };
 
 u_int first_msi_irq;
 SYSCTL_UINT(_machdep, OID_AUTO, first_msi_irq, CTLFLAG_RD, &first_msi_irq, 0,
     "Number of first IRQ reserved for MSI and MSI-X interrupts");
 
 u_int num_msi_irqs = 512;
 SYSCTL_UINT(_machdep, OID_AUTO, num_msi_irqs, CTLFLAG_RDTUN, &num_msi_irqs, 0,
     "Number of IRQs reserved for MSI and MSI-X interrupts");
 
 #ifdef SMP
 /**
  * Xen hypervisors prior to 4.6.0 do not properly handle updates to
  * enabled MSI-X table entries.  Allow migration of MSI-X interrupts
  * to be disabled via a tunable. Values have the following meaning:
  *
  * -1: automatic detection by FreeBSD
  *  0: enable migration
  *  1: disable migration
  */
 int msix_disable_migration = -1;
 SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN,
     &msix_disable_migration, 0,
     "Disable migration of MSI-X interrupts between CPUs");
 #endif
 
 static int msi_enabled;
 static u_int msi_last_irq;
 static struct mtx msi_lock;
 
 static void
 msi_enable_source(struct intsrc *isrc)
 {
 }
 
 static void
 msi_disable_source(struct intsrc *isrc, int eoi)
 {
 
 	if (eoi == PIC_EOI)
 		lapic_eoi();
 }
 
 static void
 msi_eoi_source(struct intsrc *isrc)
 {
 
 	lapic_eoi();
 }
 
 static void
 msi_enable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
 	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static void
 msi_disable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
 	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static int
 msi_vector(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
 	return (msi->msi_irq);
 }
 
 static int
 msi_source_pending(struct intsrc *isrc)
 {
 
 	return (0);
 }
 
 static int
 msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 
 	return (ENODEV);
 }
 
 static int
 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
 	int old_vector;
 	u_int old_id;
 	int i, vector;
 
 	/*
 	 * Only allow CPUs to be assigned to the first message for an
 	 * MSI group.
 	 */
 	if (msi->msi_first != msi)
 		return (EINVAL);
 
 #ifdef SMP
 	if (msix_disable_migration && msi->msi_msix)
 		return (EINVAL);
 #endif
 
 	/* Store information to free existing irq. */
 	old_vector = msi->msi_vector;
 	old_id = msi->msi_cpu;
 	if (old_id == apic_id)
 		return (0);
 
 	/* Allocate IDT vectors on this cpu. */
 	if (msi->msi_count > 1) {
 		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
 		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
 		    msi->msi_count, msi->msi_maxcount);
 	} else
 		vector = apic_alloc_vector(apic_id, msi->msi_irq);
 	if (vector == 0)
 		return (ENOSPC);
 
 	msi->msi_cpu = apic_id;
 	msi->msi_vector = vector;
 	if (msi->msi_intsrc.is_handlers > 0)
 		apic_enable_vector(msi->msi_cpu, msi->msi_vector);
 	if (bootverbose)
 		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
 		    msi->msi_cpu, msi->msi_vector);
 	for (i = 1; i < msi->msi_count; i++) {
 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
 		sib->msi_cpu = apic_id;
 		sib->msi_vector = vector + i;
 		if (sib->msi_intsrc.is_handlers > 0)
 			apic_enable_vector(sib->msi_cpu, sib->msi_vector);
 		if (bootverbose)
 			printf(
 		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
 			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
 	}
 	BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
 	    msi->msi_irq);
 
 	/*
 	 * Free the old vector after the new one is established.  This is done
 	 * to prevent races where we could miss an interrupt.
 	 */
 	if (msi->msi_intsrc.is_handlers > 0)
 		apic_disable_vector(old_id, old_vector);
 	apic_free_vector(old_id, old_vector, msi->msi_irq);
 	for (i = 1; i < msi->msi_count; i++) {
 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
 		if (sib->msi_intsrc.is_handlers > 0)
 			apic_disable_vector(old_id, old_vector + i);
 		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
 	}
 	return (0);
 }
 
 void
 msi_init(void)
 {
 
 	/* Check if we have a supported CPU. */
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_INTEL:
 	case CPU_VENDOR_AMD:
+	case CPU_VENDOR_HYGON:
 		break;
 	case CPU_VENDOR_CENTAUR:
 		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 		    CPUID_TO_MODEL(cpu_id) >= 0xf)
 			break;
 		/* FALLTHROUGH */
 	default:
 		return;
 	}
 
 #ifdef SMP
 	if (msix_disable_migration == -1) {
 		/* The default is to allow migration of MSI-X interrupts. */
 		msix_disable_migration = 0;
 	}
 #endif
 
 	if (num_msi_irqs == 0)
 		return;
 
 	first_msi_irq = num_io_irqs;
 	if (num_msi_irqs > UINT_MAX - first_msi_irq)
 		panic("num_msi_irqs too high");
 	num_io_irqs = first_msi_irq + num_msi_irqs;
 
 	msi_enabled = 1;
 	intr_register_pic(&msi_pic);
 	mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
 }
 
 static void
 msi_create_source(void)
 {
 	struct msi_intsrc *msi;
 	u_int irq;
 
 	mtx_lock(&msi_lock);
 	if (msi_last_irq >= num_msi_irqs) {
 		mtx_unlock(&msi_lock);
 		return;
 	}
 	irq = msi_last_irq + first_msi_irq;
 	msi_last_irq++;
 	mtx_unlock(&msi_lock);
 
 	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
 	msi->msi_intsrc.is_pic = &msi_pic;
 	msi->msi_irq = irq;
 	intr_register_source(&msi->msi_intsrc);
 	nexus_add_irq(irq);
 }
 
 /*
  * Try to allocate 'count' interrupt sources with contiguous IDT values.
  */
 int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
 	u_int cpu, domain, *mirqs;
 	int cnt, i, vector;
 #ifdef ACPI_DMAR
 	u_int cookies[count];
 	int error;
 #endif
 
 	if (!msi_enabled)
 		return (ENXIO);
 
 	if (bus_get_domain(dev, &domain) != 0)
 		domain = 0;
 
 	if (count > 1)
 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
 	else
 		mirqs = NULL;
 again:
 	mtx_lock(&msi_lock);
 
 	/* Try to find 'count' free IRQs. */
 	cnt = 0;
 	for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(i);
 
 		/* End of allocated sources, so break. */
 		if (msi == NULL)
 			break;
 
 		/* If this is a free one, save its IRQ in the array. */
 		if (msi->msi_dev == NULL) {
 			irqs[cnt] = i;
 			cnt++;
 			if (cnt == count)
 				break;
 		}
 	}
 
 	/* Do we need to create some new sources? */
 	if (cnt < count) {
 		/* If we would exceed the max, give up. */
 		if (i + (count - cnt) > first_msi_irq + num_msi_irqs) {
 			mtx_unlock(&msi_lock);
 			free(mirqs, M_MSI);
 			return (ENXIO);
 		}
 		mtx_unlock(&msi_lock);
 
 		/* We need count - cnt more sources. */
 		while (cnt < count) {
 			msi_create_source();
 			cnt++;
 		}
 		goto again;
 	}
 
 	/* Ok, we now have the IRQs allocated. */
 	KASSERT(cnt == count, ("count mismatch"));
 
 	/* Allocate 'count' IDT vectors. */
 	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
 		free(mirqs, M_MSI);
 		return (ENOSPC);
 	}
 
 #ifdef ACPI_DMAR
 	mtx_unlock(&msi_lock);
 	error = iommu_alloc_msi_intr(dev, cookies, count);
 	mtx_lock(&msi_lock);
 	if (error == EOPNOTSUPP)
 		error = 0;
 	if (error != 0) {
 		for (i = 0; i < count; i++)
 			apic_free_vector(cpu, vector + i, irqs[i]);
 		free(mirqs, M_MSI);
 		return (error);
 	}
 	for (i = 0; i < count; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
 		msi->msi_remap_cookie = cookies[i];
 	}
 #endif
 
 	/* Assign IDT vectors and make these messages owned by 'dev'. */
 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
 	for (i = 0; i < count; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
 		msi->msi_cpu = cpu;
 		msi->msi_dev = dev;
 		msi->msi_vector = vector + i;
 		if (bootverbose)
 			printf(
 		    "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
 			    msi->msi_irq, msi->msi_cpu, msi->msi_vector);
 		msi->msi_first = fsrc;
 		KASSERT(msi->msi_intsrc.is_handlers == 0,
 		    ("dead MSI has handlers"));
 	}
 	fsrc->msi_count = count;
 	fsrc->msi_maxcount = maxcount;
 	if (count > 1)
 		bcopy(irqs, mirqs, count * sizeof(*mirqs));
 	fsrc->msi_irqs = mirqs;
 	mtx_unlock(&msi_lock);
 	return (0);
 }
 
 int
 msi_release(int *irqs, int count)
 {
 	struct msi_intsrc *msi, *first;
 	int i;
 
 	mtx_lock(&msi_lock);
 	first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
 	if (first == NULL) {
 		mtx_unlock(&msi_lock);
 		return (ENOENT);
 	}
 
 	/* Make sure this isn't an MSI-X message. */
 	if (first->msi_msix) {
 		mtx_unlock(&msi_lock);
 		return (EINVAL);
 	}
 
 	/* Make sure this message is allocated to a group. */
 	if (first->msi_first == NULL) {
 		mtx_unlock(&msi_lock);
 		return (ENXIO);
 	}
 
 	/*
 	 * Make sure this is the start of a group and that we are releasing
 	 * the entire group.
 	 */
 	if (first->msi_first != first || first->msi_count != count) {
 		mtx_unlock(&msi_lock);
 		return (EINVAL);
 	}
 	KASSERT(first->msi_dev != NULL, ("unowned group"));
 
 	/* Clear all the extra messages in the group. */
 	for (i = 1; i < count; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
 		KASSERT(msi->msi_first == first, ("message not in group"));
 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
 #ifdef ACPI_DMAR
 		iommu_unmap_msi_intr(first->msi_dev, msi->msi_remap_cookie);
 #endif
 		msi->msi_first = NULL;
 		msi->msi_dev = NULL;
 		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
 		msi->msi_vector = 0;
 	}
 
 	/* Clear out the first message. */
 #ifdef ACPI_DMAR
 	mtx_unlock(&msi_lock);
 	iommu_unmap_msi_intr(first->msi_dev, first->msi_remap_cookie);
 	mtx_lock(&msi_lock);
 #endif
 	first->msi_first = NULL;
 	first->msi_dev = NULL;
 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
 	first->msi_vector = 0;
 	first->msi_count = 0;
 	first->msi_maxcount = 0;
 	free(first->msi_irqs, M_MSI);
 	first->msi_irqs = NULL;
 
 	mtx_unlock(&msi_lock);
 	return (0);
 }
 
 int
 msi_map(int irq, uint64_t *addr, uint32_t *data)
 {
 	struct msi_intsrc *msi;
 	int error;
 #ifdef ACPI_DMAR
 	struct msi_intsrc *msi1;
 	int i, k;
 #endif
 
 	mtx_lock(&msi_lock);
 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
 	if (msi == NULL) {
 		mtx_unlock(&msi_lock);
 		return (ENOENT);
 	}
 
 	/* Make sure this message is allocated to a device. */
 	if (msi->msi_dev == NULL) {
 		mtx_unlock(&msi_lock);
 		return (ENXIO);
 	}
 
 	/*
 	 * If this message isn't an MSI-X message, make sure it's part
 	 * of a group, and switch to the first message in the
 	 * group.
 	 */
 	if (!msi->msi_msix) {
 		if (msi->msi_first == NULL) {
 			mtx_unlock(&msi_lock);
 			return (ENXIO);
 		}
 		msi = msi->msi_first;
 	}
 
 #ifdef ACPI_DMAR
 	if (!msi->msi_msix) {
 		for (k = msi->msi_count - 1, i = first_msi_irq; k > 0 &&
 		    i < first_msi_irq + num_msi_irqs; i++) {
 			if (i == msi->msi_irq)
 				continue;
 			msi1 = (struct msi_intsrc *)intr_lookup_source(i);
 			if (!msi1->msi_msix && msi1->msi_first == msi) {
 				mtx_unlock(&msi_lock);
 				iommu_map_msi_intr(msi1->msi_dev,
 				    msi1->msi_cpu, msi1->msi_vector,
 				    msi1->msi_remap_cookie, NULL, NULL);
 				k--;
 				mtx_lock(&msi_lock);
 			}
 		}
 	}
 	mtx_unlock(&msi_lock);
 	error = iommu_map_msi_intr(msi->msi_dev, msi->msi_cpu,
 	    msi->msi_vector, msi->msi_remap_cookie, addr, data);
 #else
 	mtx_unlock(&msi_lock);
 	error = EOPNOTSUPP;
 #endif
 	if (error == EOPNOTSUPP) {
 		*addr = INTEL_ADDR(msi);
 		*data = INTEL_DATA(msi);
 		error = 0;
 	}
 	return (error);
 }
 
 int
 msix_alloc(device_t dev, int *irq)
 {
 	struct msi_intsrc *msi;
 	u_int cpu, domain;
 	int i, vector;
 #ifdef ACPI_DMAR
 	u_int cookie;
 	int error;
 #endif
 
 	if (!msi_enabled)
 		return (ENXIO);
 
 	if (bus_get_domain(dev, &domain) != 0)
 		domain = 0;
 
 again:
 	mtx_lock(&msi_lock);
 
 	/* Find a free IRQ. */
 	for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(i);
 
 		/* End of allocated sources, so break. */
 		if (msi == NULL)
 			break;
 
 		/* Stop at the first free source. */
 		if (msi->msi_dev == NULL)
 			break;
 	}
 
 	/* Are all IRQs in use? */
 	if (i == first_msi_irq + num_msi_irqs) {
 		mtx_unlock(&msi_lock);
 		return (ENXIO);
 	}
 
 	/* Do we need to create a new source? */
 	if (msi == NULL) {
 		mtx_unlock(&msi_lock);
 
 		/* Create a new source. */
 		msi_create_source();
 		goto again;
 	}
 
 	/* Allocate an IDT vector. */
 	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vector(cpu, i);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
 		return (ENOSPC);
 	}
 
 	msi->msi_dev = dev;
 #ifdef ACPI_DMAR
 	mtx_unlock(&msi_lock);
 	error = iommu_alloc_msi_intr(dev, &cookie, 1);
 	mtx_lock(&msi_lock);
 	if (error == EOPNOTSUPP)
 		error = 0;
 	if (error != 0) {
 		msi->msi_dev = NULL;
 		apic_free_vector(cpu, vector, i);
 		return (error);
 	}
 	msi->msi_remap_cookie = cookie;
 #endif
 
 	if (bootverbose)
 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_irq, cpu, vector);
 
 	/* Setup source. */
 	msi->msi_cpu = cpu;
 	msi->msi_first = msi;
 	msi->msi_vector = vector;
 	msi->msi_msix = 1;
 	msi->msi_count = 1;
 	msi->msi_maxcount = 1;
 	msi->msi_irqs = NULL;
 
 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
 	mtx_unlock(&msi_lock);
 
 	*irq = i;
 	return (0);
 }
 
 int
 msix_release(int irq)
 {
 	struct msi_intsrc *msi;
 
 	mtx_lock(&msi_lock);
 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
 	if (msi == NULL) {
 		mtx_unlock(&msi_lock);
 		return (ENOENT);
 	}
 
 	/* Make sure this is an MSI-X message. */
 	if (!msi->msi_msix) {
 		mtx_unlock(&msi_lock);
 		return (EINVAL);
 	}
 
 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
 
 	/* Clear out the message. */
 #ifdef ACPI_DMAR
 	mtx_unlock(&msi_lock);
 	iommu_unmap_msi_intr(msi->msi_dev, msi->msi_remap_cookie);
 	mtx_lock(&msi_lock);
 #endif
 	msi->msi_first = NULL;
 	msi->msi_dev = NULL;
 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
 	msi->msi_vector = 0;
 	msi->msi_msix = 0;
 	msi->msi_count = 0;
 	msi->msi_maxcount = 0;
 
 	mtx_unlock(&msi_lock);
 	return (0);
 }
Index: head/sys/x86/x86/tsc.c
===================================================================
--- head/sys/x86/x86/tsc.c	(revision 356939)
+++ head/sys/x86/x86/tsc.c	(revision 356940)
@@ -1,829 +1,832 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998-2003 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_clock.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/timetc.h>
 #include <sys/kernel.h>
 #include <sys/power.h>
 #include <sys/smp.h>
 #include <sys/vdso.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/vmware.h>
 #include <dev/acpica/acpi_hpet.h>
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include "cpufreq_if.h"
 
 uint64_t	tsc_freq;
 int		tsc_is_invariant;
 int		tsc_perf_stat;
 
 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
 
 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
 
 #ifdef SMP
 int	smp_tsc;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
     "Indicates whether the TSC is safe to use in SMP mode");
 
 int	smp_tsc_adjust = 0;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
 #endif
 
 static int	tsc_shift = 1;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
 
 static int	tsc_disabled;
 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
     "Disable x86 Time Stamp Counter");
 
 static int	tsc_skip_calibration;
 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN |
     CTLFLAG_NOFETCH, &tsc_skip_calibration, 0,
     "Disable TSC frequency calibration");
 
 static void tsc_freq_changed(void *arg, const struct cf_level *level,
     int status);
 static void tsc_freq_changing(void *arg, const struct cf_level *level,
     int *status);
 static unsigned tsc_get_timecount(struct timecounter *tc);
 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
 static void tsc_levels_changed(void *arg, int unit);
 static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
     struct timecounter *tc);
 #ifdef COMPAT_FREEBSD32
 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
     struct timecounter *tc);
 #endif
 
 static struct timecounter tsc_timecounter = {
 	.tc_get_timecount =		tsc_get_timecount,
 	.tc_counter_mask =		~0u,
 	.tc_name =			"TSC",
 	.tc_quality =			800,	/* adjusted in code */
 	.tc_fill_vdso_timehands = 	x86_tsc_vdso_timehands,
 #ifdef COMPAT_FREEBSD32
 	.tc_fill_vdso_timehands32 = 	x86_tsc_vdso_timehands32,
 #endif
 };
 
 static void
 tsc_freq_vmware(void)
 {
 	u_int regs[4];
 
 	if (hv_high >= 0x40000010) {
 		do_cpuid(0x40000010, regs);
 		tsc_freq = regs[0] * 1000;
 	} else {
 		vmware_hvcall(VMW_HVCMD_GETHZ, regs);
 		if (regs[1] != UINT_MAX)
 			tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
 	}
 	tsc_is_invariant = 1;
 }
 
 /*
  * Calculate TSC frequency using information from the CPUID leaf 0x15
  * 'Time Stamp Counter and Nominal Core Crystal Clock'.  If leaf 0x15
  * is not functional, as it is on Skylake/Kabylake, try 0x16 'Processor
  * Frequency Information'.  Leaf 0x16 is described in the SDM as
  * informational only, but if 0x15 did not work, and TSC calibration
  * is disabled, it is the best we can get at all.  It should still be
  * an improvement over the parsing of the CPU model name in
  * tsc_freq_intel(), when available.
  */
 static bool
 tsc_freq_cpuid(void)
 {
 	u_int regs[4];
 
 	if (cpu_high < 0x15)
 		return (false);
 	do_cpuid(0x15, regs);
 	if (regs[0] != 0 && regs[1] != 0 && regs[2] != 0) {
 		tsc_freq = (uint64_t)regs[2] * regs[1] / regs[0];
 		return (true);
 	}
 
 	if (cpu_high < 0x16)
 		return (false);
 	do_cpuid(0x16, regs);
 	if (regs[0] != 0) {
 		tsc_freq = (uint64_t)regs[0] * 1000000;
 		return (true);
 	}
 
 	return (false);
 }
 
 static void
 tsc_freq_intel(void)
 {
 	char brand[48];
 	u_int regs[4];
 	uint64_t freq;
 	char *p;
 	u_int i;
 
 	/*
 	 * Intel Processor Identification and the CPUID Instruction
 	 * Application Note 485.
 	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
 	 */
 	if (cpu_exthigh >= 0x80000004) {
 		p = brand;
 		for (i = 0x80000002; i < 0x80000005; i++) {
 			do_cpuid(i, regs);
 			memcpy(p, regs, sizeof(regs));
 			p += sizeof(regs);
 		}
 		p = NULL;
 		for (i = 0; i < sizeof(brand) - 1; i++)
 			if (brand[i] == 'H' && brand[i + 1] == 'z')
 				p = brand + i;
 		if (p != NULL) {
 			p -= 5;
 			switch (p[4]) {
 			case 'M':
 				i = 1;
 				break;
 			case 'G':
 				i = 1000;
 				break;
 			case 'T':
 				i = 1000000;
 				break;
 			default:
 				return;
 			}
 #define	C2D(c)	((c) - '0')
 			if (p[1] == '.') {
 				freq = C2D(p[0]) * 1000;
 				freq += C2D(p[2]) * 100;
 				freq += C2D(p[3]) * 10;
 				freq *= i * 1000;
 			} else {
 				freq = C2D(p[0]) * 1000;
 				freq += C2D(p[1]) * 100;
 				freq += C2D(p[2]) * 10;
 				freq += C2D(p[3]);
 				freq *= i * 1000000;
 			}
 #undef C2D
 			tsc_freq = freq;
 		}
 	}
 }
 
 static void
 probe_tsc_freq(void)
 {
 	uint64_t tsc1, tsc2;
 	uint16_t bootflags;
 
 	if (cpu_power_ecx & CPUID_PERF_STAT) {
 		/*
 		 * XXX Some emulators expose host CPUID without actual support
 		 * for these MSRs.  We must test whether they really work.
 		 */
 		wrmsr(MSR_MPERF, 0);
 		wrmsr(MSR_APERF, 0);
 		DELAY(10);
 		if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
 			tsc_perf_stat = 1;
 	}
 
 	if (vm_guest == VM_GUEST_VMWARE) {
 		tsc_freq_vmware();
 		return;
 	}
 
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_AMD:
+	case CPU_VENDOR_HYGON:
 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
 		    (vm_guest == VM_GUEST_NO &&
 		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
 			tsc_is_invariant = 1;
 		if (cpu_feature & CPUID_SSE2) {
 			tsc_timecounter.tc_get_timecount =
 			    tsc_get_timecount_mfence;
 		}
 		break;
 	case CPU_VENDOR_INTEL:
 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
 		    (vm_guest == VM_GUEST_NO &&
 		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
 		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
 		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
 			tsc_is_invariant = 1;
 		if (cpu_feature & CPUID_SSE2) {
 			tsc_timecounter.tc_get_timecount =
 			    tsc_get_timecount_lfence;
 		}
 		break;
 	case CPU_VENDOR_CENTAUR:
 		if (vm_guest == VM_GUEST_NO &&
 		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
 		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
 			tsc_is_invariant = 1;
 		if (cpu_feature & CPUID_SSE2) {
 			tsc_timecounter.tc_get_timecount =
 			    tsc_get_timecount_lfence;
 		}
 		break;
 	}
 
 	if (!TUNABLE_INT_FETCH("machdep.disable_tsc_calibration",
 	    &tsc_skip_calibration)) {
 		/*
 		 * User did not give the order about calibration.
 		 * If he did, we do not try to guess.
 		 *
 		 * Otherwise, if ACPI FADT reports that the platform
 		 * is legacy-free and CPUID provides TSC frequency,
 		 * use it.  The calibration could fail anyway since
 		 * ISA timer can be absent or power gated.
 		 */
 		if (acpi_get_fadt_bootflags(&bootflags) &&
 		    (bootflags & ACPI_FADT_LEGACY_DEVICES) == 0 &&
 		    tsc_freq_cpuid()) {
 			printf("Skipping TSC calibration since no legacy "
 			    "devices reported by FADT and CPUID works\n");
 			tsc_skip_calibration = 1;
 		}
 	}
 	if (tsc_skip_calibration) {
 		if (tsc_freq_cpuid())
 			;
 		else if (cpu_vendor_id == CPU_VENDOR_INTEL)
 			tsc_freq_intel();
 	} else {
 		if (bootverbose)
 			printf("Calibrating TSC clock ... ");
 		tsc1 = rdtsc();
 		DELAY(1000000);
 		tsc2 = rdtsc();
 		tsc_freq = tsc2 - tsc1;
 	}
 	if (bootverbose)
 		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
 }
 
 void
 init_TSC(void)
 {
 
 	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
 		return;
 
 #ifdef __i386__
 	/* The TSC is known to be broken on certain CPUs. */
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_AMD:
 		switch (cpu_id & 0xFF0) {
 		case 0x500:
 			/* K5 Model 0 */
 			return;
 		}
 		break;
 	case CPU_VENDOR_CENTAUR:
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			/*
 			 * http://www.centtech.com/c6_data_sheet.pdf
 			 *
 			 * I-12 RDTSC may return incoherent values in EDX:EAX
 			 * I-13 RDTSC hangs when certain event counters are used
 			 */
 			return;
 		}
 		break;
 	case CPU_VENDOR_NSC:
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			if ((cpu_id & CPUID_STEPPING) == 0)
 				return;
 			break;
 		}
 		break;
 	}
 #endif
 		
 	probe_tsc_freq();
 
 	/*
 	 * Inform CPU accounting about our boot-time clock rate.  This will
 	 * be updated if someone loads a cpufreq driver after boot that
 	 * discovers a new max frequency.
 	 */
 	if (tsc_freq != 0)
 		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
 
 	if (tsc_is_invariant)
 		return;
 
 	/* Register to find out about changes in CPU frequency. */
 	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
 	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
 	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
 	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef SMP
 
 /*
  * RDTSC is not a serializing instruction, and does not drain
  * instruction stream, so we need to drain the stream before executing
  * it.  It could be fixed by use of RDTSCP, except the instruction is
  * not available everywhere.
  *
  * Use CPUID for draining in the boot-time SMP constistency test.  The
  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
  * and VIA) when SSE2 is present, and nothing on older machines which
  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
  *
  * Do not use do_cpuid(), since we do not need CPUID results, which
  * have to be written into memory with do_cpuid().
  */
 #define	TSC_READ(x)							\
 static void								\
 tsc_read_##x(void *arg)							\
 {									\
 	uint64_t *tsc = arg;						\
 	u_int cpu = PCPU_GET(cpuid);					\
 									\
 	__asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");	\
 	tsc[cpu * 3 + x] = rdtsc();					\
 }
 TSC_READ(0)
 TSC_READ(1)
 TSC_READ(2)
 #undef TSC_READ
 
 #define	N	1000
 
 static void
 comp_smp_tsc(void *arg)
 {
 	uint64_t *tsc;
 	int64_t d1, d2;
 	u_int cpu = PCPU_GET(cpuid);
 	u_int i, j, size;
 
 	size = (mp_maxid + 1) * 3;
 	for (i = 0, tsc = arg; i < N; i++, tsc += size)
 		CPU_FOREACH(j) {
 			if (j == cpu)
 				continue;
 			d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
 			d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
 			if (d1 <= 0 || d2 <= 0) {
 				smp_tsc = 0;
 				return;
 			}
 		}
 }
 
 static void
 adj_smp_tsc(void *arg)
 {
 	uint64_t *tsc;
 	int64_t d, min, max;
 	u_int cpu = PCPU_GET(cpuid);
 	u_int first, i, size;
 
 	first = CPU_FIRST();
 	if (cpu == first)
 		return;
 	min = INT64_MIN;
 	max = INT64_MAX;
 	size = (mp_maxid + 1) * 3;
 	for (i = 0, tsc = arg; i < N; i++, tsc += size) {
 		d = tsc[first * 3] - tsc[cpu * 3 + 1];
 		if (d > min)
 			min = d;
 		d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
 		if (d > min)
 			min = d;
 		d = tsc[first * 3 + 1] - tsc[cpu * 3];
 		if (d < max)
 			max = d;
 		d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
 		if (d < max)
 			max = d;
 	}
 	if (min > max)
 		return;
 	d = min / 2 + max / 2;
 	__asm __volatile (
 		"movl $0x10, %%ecx\n\t"
 		"rdmsr\n\t"
 		"addl %%edi, %%eax\n\t"
 		"adcl %%esi, %%edx\n\t"
 		"wrmsr\n"
 		: /* No output */
 		: "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
 		: "ax", "cx", "dx", "cc"
 	);
 }
 
 static int
 test_tsc(int adj_max_count)
 {
 	uint64_t *data, *tsc;
 	u_int i, size, adj;
 
 	if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
 		return (-100);
 	size = (mp_maxid + 1) * 3;
 	data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
 	adj = 0;
 retry:
 	for (i = 0, tsc = data; i < N; i++, tsc += size)
 		smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
 	smp_tsc = 1;	/* XXX */
 	smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
 	    smp_no_rendezvous_barrier, data);
 	if (!smp_tsc && adj < adj_max_count) {
 		adj++;
 		smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
 		    smp_no_rendezvous_barrier, data);
 		goto retry;
 	}
 	free(data, M_TEMP);
 	if (bootverbose)
 		printf("SMP: %sed TSC synchronization test%s\n",
 		    smp_tsc ? "pass" : "fail", 
 		    adj > 0 ? " after adjustment" : "");
 	if (smp_tsc && tsc_is_invariant) {
 		switch (cpu_vendor_id) {
 		case CPU_VENDOR_AMD:
+		case CPU_VENDOR_HYGON:
 			/*
 			 * Starting with Family 15h processors, TSC clock
 			 * source is in the north bridge.  Check whether
 			 * we have a single-socket/multi-core platform.
 			 * XXX Need more work for complex cases.
 			 */
 			if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
 			    (amd_feature2 & AMDID2_CMP) == 0 ||
 			    smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
 				break;
 			return (1000);
 		case CPU_VENDOR_INTEL:
 			/*
 			 * XXX Assume Intel platforms have synchronized TSCs.
 			 */
 			return (1000);
 		}
 		return (800);
 	}
 	return (-100);
 }
 
 #undef N
 
 #endif /* SMP */
 
 static void
 init_TSC_tc(void)
 {
 	uint64_t max_freq;
 	int shift;
 
 	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
 		return;
 
 	/*
 	 * Limit timecounter frequency to fit in an int and prevent it from
 	 * overflowing too fast.
 	 */
 	max_freq = UINT_MAX;
 
 	/*
 	 * We can not use the TSC if we support APM.  Precise timekeeping
 	 * on an APM'ed machine is at best a fools pursuit, since 
 	 * any and all of the time spent in various SMM code can't 
 	 * be reliably accounted for.  Reading the RTC is your only
 	 * source of reliable time info.  The i8254 loses too, of course,
 	 * but we need to have some kind of time...
 	 * We don't know at this point whether APM is going to be used
 	 * or not, nor when it might be activated.  Play it safe.
 	 */
 	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
 		tsc_timecounter.tc_quality = -1000;
 		if (bootverbose)
 			printf("TSC timecounter disabled: APM enabled.\n");
 		goto init;
 	}
 
 	/*
 	 * Intel CPUs without a C-state invariant TSC can stop the TSC
 	 * in either C2 or C3.  Disable use of C2 and C3 while using
 	 * the TSC as the timecounter.  The timecounter can be changed
 	 * to enable C2 and C3.
 	 *
 	 * Note that the TSC is used as the cputicker for computing
 	 * thread runtime regardless of the timecounter setting, so
 	 * using an alternate timecounter and enabling C2 or C3 can
 	 * result incorrect runtimes for kernel idle threads (but not
 	 * for any non-idle threads).
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
 		tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
 		if (bootverbose)
 			printf("TSC timecounter disables C2 and C3.\n");
 	}
 
 	/*
 	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
 	 * are synchronized.  If the user is sure that the system has
 	 * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
 	 * non-zero value.  The TSC seems unreliable in virtualized SMP
 	 * environments, so it is set to a negative quality in those cases.
 	 */
 #ifdef SMP
 	if (mp_ncpus > 1)
 		tsc_timecounter.tc_quality = test_tsc(smp_tsc_adjust);
 	else
 #endif /* SMP */
 	if (tsc_is_invariant)
 		tsc_timecounter.tc_quality = 1000;
 	max_freq >>= tsc_shift;
 
 init:
 	for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
 		;
 	if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
-		if (cpu_vendor_id == CPU_VENDOR_AMD) {
+		if (cpu_vendor_id == CPU_VENDOR_AMD ||
+		    cpu_vendor_id == CPU_VENDOR_HYGON) {
 			tsc_timecounter.tc_get_timecount = shift > 0 ?
 			    tsc_get_timecount_low_mfence :
 			    tsc_get_timecount_mfence;
 		} else {
 			tsc_timecounter.tc_get_timecount = shift > 0 ?
 			    tsc_get_timecount_low_lfence :
 			    tsc_get_timecount_lfence;
 		}
 	} else {
 		tsc_timecounter.tc_get_timecount = shift > 0 ?
 		    tsc_get_timecount_low : tsc_get_timecount;
 	}
 	if (shift > 0) {
 		tsc_timecounter.tc_name = "TSC-low";
 		if (bootverbose)
 			printf("TSC timecounter discards lower %d bit(s)\n",
 			    shift);
 	}
 	if (tsc_freq != 0) {
 		tsc_timecounter.tc_frequency = tsc_freq >> shift;
 		tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
 		tc_init(&tsc_timecounter);
 	}
 }
 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
 
 void
 resume_TSC(void)
 {
 #ifdef SMP
 	int quality;
 
 	/* If TSC was not good on boot, it is unlikely to become good now. */
 	if (tsc_timecounter.tc_quality < 0)
 		return;
 	/* Nothing to do with UP. */
 	if (mp_ncpus < 2)
 		return;
 
 	/*
 	 * If TSC was good, a single synchronization should be enough,
 	 * but honour smp_tsc_adjust if it's set.
 	 */
 	quality = test_tsc(MAX(smp_tsc_adjust, 1));
 	if (quality != tsc_timecounter.tc_quality) {
 		printf("TSC timecounter quality changed: %d -> %d\n",
 		    tsc_timecounter.tc_quality, quality);
 		tsc_timecounter.tc_quality = quality;
 	}
 #endif /* SMP */
 }
 
 /*
  * When cpufreq levels change, find out about the (new) max frequency.  We
  * use this to update CPU accounting in case it got a lower estimate at boot.
  */
 static void
 tsc_levels_changed(void *arg, int unit)
 {
 	device_t cf_dev;
 	struct cf_level *levels;
 	int count, error;
 	uint64_t max_freq;
 
 	/* Only use values from the first CPU, assuming all are equal. */
 	if (unit != 0)
 		return;
 
 	/* Find the appropriate cpufreq device instance. */
 	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
 	if (cf_dev == NULL) {
 		printf("tsc_levels_changed() called but no cpufreq device?\n");
 		return;
 	}
 
 	/* Get settings from the device and find the max frequency. */
 	count = 64;
 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
 	if (levels == NULL)
 		return;
 	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
 	if (error == 0 && count != 0) {
 		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
 		set_cputicker(rdtsc, max_freq, 1);
 	} else
 		printf("tsc_levels_changed: no max freq found\n");
 	free(levels, M_TEMP);
 }
 
 /*
  * If the TSC timecounter is in use, veto the pending change.  It may be
  * possible in the future to handle a dynamically-changing timecounter rate.
  */
 static void
 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
 {
 
 	if (*status != 0 || timecounter != &tsc_timecounter)
 		return;
 
 	printf("timecounter TSC must not be in use when "
 	    "changing frequencies; change denied\n");
 	*status = EBUSY;
 }
 
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
 {
 	uint64_t freq;
 
 	/* If there was an error during the transition, don't do anything. */
 	if (tsc_disabled || status != 0)
 		return;
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	freq = (uint64_t)level->total_set.freq * 1000000;
 	atomic_store_rel_64(&tsc_freq, freq);
 	tsc_timecounter.tc_frequency =
 	    freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
 }
 
 static int
 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint64_t freq;
 
 	freq = atomic_load_acq_64(&tsc_freq);
 	if (freq == 0)
 		return (EOPNOTSUPP);
 	error = sysctl_handle_64(oidp, &freq, 0, req);
 	if (error == 0 && req->newptr != NULL) {
 		atomic_store_rel_64(&tsc_freq, freq);
 		atomic_store_rel_64(&tsc_timecounter.tc_frequency,
 		    freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
 
 static u_int
 tsc_get_timecount(struct timecounter *tc __unused)
 {
 
 	return (rdtsc32());
 }
 
 static inline u_int
 tsc_get_timecount_low(struct timecounter *tc)
 {
 	uint32_t rv;
 
 	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
 	    : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
 	return (rv);
 }
 
 static u_int
 tsc_get_timecount_lfence(struct timecounter *tc __unused)
 {
 
 	lfence();
 	return (rdtsc32());
 }
 
 static u_int
 tsc_get_timecount_low_lfence(struct timecounter *tc)
 {
 
 	lfence();
 	return (tsc_get_timecount_low(tc));
 }
 
 static u_int
 tsc_get_timecount_mfence(struct timecounter *tc __unused)
 {
 
 	mfence();
 	return (rdtsc32());
 }
 
 static u_int
 tsc_get_timecount_low_mfence(struct timecounter *tc)
 {
 
 	mfence();
 	return (tsc_get_timecount_low(tc));
 }
 
 static uint32_t
 x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
 {
 
 	vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
 	vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
 	vdso_th->th_x86_hpet_idx = 0xffffffff;
 	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
 	return (1);
 }
 
 #ifdef COMPAT_FREEBSD32
 static uint32_t
 x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
     struct timecounter *tc)
 {
 
 	vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
 	vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
 	vdso_th32->th_x86_hpet_idx = 0xffffffff;
 	bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
 	return (1);
 }
 #endif