part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch
koinec_yahoo.co.jp (Koine Yuusuke)
Actions

Authored By

	koinec_yahoo.co.jp
	Mar 24 2024, 11:39 AM

Size

117 KB

Referenced Files

None

Subscribers

None

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch
View Options

	diff --git a/sys/conf/options b/sys/conf/options
	index 555484360a2b..e625fdca214d 100644
	--- a/sys/conf/options
	+++ b/sys/conf/options
	@@ -1,1024 +1,1025 @@
	#
	# On the handling of kernel options
	#
	# All kernel options should be listed in NOTES, with suitable
	# descriptions. Negative options (options that make some code not
	# compile) should be commented out; LINT (generated from NOTES) should
	# compile as much code as possible. Try to structure option-using
	# code so that a single option only switch code on, or only switch
	# code off, to make it possible to have a full compile-test. If
	# necessary, you can check for COMPILING_LINT to get maximum code
	# coverage.
	#
	# All new options shall also be listed in either "conf/options" or
	# "conf/options.<machine>". Options that affect a single source-file
	# <xxx>.[c\|s] should be directed into "opt_<xxx>.h", while options
	# that affect multiple files should either go in "opt_global.h" if
	# this is a kernel-wide option (used just about everywhere), or in
	# "opt_<option-name-in-lower-case>.h" if it affects only some files.
	# Note that the effect of listing only an option without a
	# header-file-name in conf/options (and cousins) is that the last
	# convention is followed.
	#
	# This handling scheme is not yet fully implemented.
	#
	#
	# Format of this file:
	# Option name filename
	#
	# If filename is missing, the default is
	# opt_<name-of-option-in-lower-case>.h

	AAC_DEBUG opt_aac.h
	AACRAID_DEBUG opt_aacraid.h
	AHC_ALLOW_MEMIO opt_aic7xxx.h
	AHC_TMODE_ENABLE opt_aic7xxx.h
	AHC_DUMP_EEPROM opt_aic7xxx.h
	AHC_DEBUG opt_aic7xxx.h
	AHC_DEBUG_OPTS opt_aic7xxx.h
	AHC_REG_PRETTY_PRINT opt_aic7xxx.h
	AHD_DEBUG opt_aic79xx.h
	AHD_DEBUG_OPTS opt_aic79xx.h
	AHD_TMODE_ENABLE opt_aic79xx.h
	AHD_REG_PRETTY_PRINT opt_aic79xx.h

	# Debugging options.
	ALT_BREAK_TO_DEBUGGER opt_kdb.h
	BREAK_TO_DEBUGGER opt_kdb.h
	BUF_TRACKING opt_global.h
	DDB
	DDB_BUFR_SIZE opt_ddb.h
	DDB_CAPTURE_DEFAULTBUFSIZE opt_ddb.h
	DDB_CAPTURE_MAXBUFSIZE opt_ddb.h
	DDB_CTF opt_ddb.h
	DDB_NUMSYM opt_ddb.h
	EARLY_PRINTF opt_global.h
	FULL_BUF_TRACKING opt_global.h
	GDB
	KDB opt_global.h
	KDB_TRACE opt_kdb.h
	KDB_UNATTENDED opt_kdb.h
	KLD_DEBUG opt_kld.h
	NUM_CORE_FILES opt_global.h
	QUEUE_MACRO_DEBUG_TRACE opt_global.h
	QUEUE_MACRO_DEBUG_TRASH opt_global.h
	SYSCTL_DEBUG opt_sysctl.h
	TEXTDUMP_PREFERRED opt_ddb.h
	TEXTDUMP_VERBOSE opt_ddb.h
	TSLOG opt_global.h
	TSLOG_PAGEZERO opt_global.h
	TSLOGSIZE opt_global.h

	# Miscellaneous options.
	ALQ
	ALTERA_SDCARD_FAST_SIM opt_altera_sdcard.h
	ATSE_CFI_HACK opt_cfi.h
	AUDIT opt_global.h
	BOOTHOWTO opt_global.h
	BOOTVERBOSE opt_global.h
	CALLOUT_PROFILING
	CAPABILITIES opt_capsicum.h
	CAPABILITY_MODE opt_capsicum.h
	CC_CDG opt_global.h
	CC_CHD opt_global.h
	CC_CUBIC opt_global.h
	CC_DEFAULT opt_cc.h
	CC_DCTCP opt_global.h
	CC_HD opt_global.h
	CC_HTCP opt_global.h
	CC_NEWRENO opt_global.h
	CC_VEGAS opt_global.h
	COMPAT_43 opt_global.h
	COMPAT_43TTY opt_global.h
	COMPAT_FREEBSD4 opt_global.h
	COMPAT_FREEBSD5 opt_global.h
	COMPAT_FREEBSD6 opt_global.h
	COMPAT_FREEBSD7 opt_global.h
	COMPAT_FREEBSD9 opt_global.h
	COMPAT_FREEBSD10 opt_global.h
	COMPAT_FREEBSD11 opt_global.h
	COMPAT_FREEBSD12 opt_global.h
	COMPAT_FREEBSD13 opt_global.h
	COMPAT_FREEBSD14 opt_global.h
	COMPAT_LINUXKPI opt_dontuse.h
	COMPILING_LINT opt_global.h
	CY_PCI_FASTINTR
	DEADLKRES opt_watchdog.h
	EXPERIMENTAL opt_global.h
	DIRECTIO
	FILEMON opt_dontuse.h
	FFCLOCK
	FULL_PREEMPTION opt_sched.h
	GZIO opt_gzio.h
	IMGACT_BINMISC opt_dontuse.h
	IPI_PREEMPTION opt_sched.h
	GEOM_BDE opt_geom.h
	GEOM_CACHE opt_geom.h
	GEOM_CONCAT opt_geom.h
	GEOM_ELI opt_geom.h
	GEOM_GATE opt_geom.h
	GEOM_JOURNAL opt_geom.h
	GEOM_LABEL opt_geom.h
	GEOM_LABEL_GPT opt_geom.h
	GEOM_LINUX_LVM opt_geom.h
	GEOM_MAP opt_geom.h
	GEOM_MIRROR opt_geom.h
	GEOM_MOUNTVER opt_geom.h
	GEOM_MULTIPATH opt_geom.h
	GEOM_NOP opt_geom.h
	GEOM_PART_APM opt_geom.h
	GEOM_PART_BSD opt_geom.h
	GEOM_PART_BSD64 opt_geom.h
	GEOM_PART_EBR opt_geom.h
	GEOM_PART_GPT opt_geom.h
	GEOM_PART_LDM opt_geom.h
	GEOM_PART_MBR opt_geom.h
	GEOM_RAID opt_geom.h
	GEOM_RAID3 opt_geom.h
	GEOM_SHSEC opt_geom.h
	GEOM_STRIPE opt_geom.h
	GEOM_UZIP opt_geom.h
	GEOM_UZIP_DEBUG opt_geom.h
	GEOM_VINUM opt_geom.h
	GEOM_VIRSTOR opt_geom.h
	GEOM_ZERO opt_geom.h
	IFLIB opt_iflib.h
	KDTRACE_HOOKS opt_global.h
	KDTRACE_FRAME opt_kdtrace.h
	KN_HASHSIZE opt_kqueue.h
	KSTACK_MAX_PAGES
	KSTACK_PAGES
	KSTACK_USAGE_PROF
	KTRACE
	KTRACE_REQUEST_POOL opt_ktrace.h
	LIBICONV
	MAC opt_global.h
	MAC_BIBA opt_dontuse.h
	MAC_BSDEXTENDED opt_dontuse.h
	MAC_DDB opt_dontuse.h
	MAC_IFOFF opt_dontuse.h
	MAC_IPACL opt_dontuse.h
	MAC_LOMAC opt_dontuse.h
	MAC_MLS opt_dontuse.h
	MAC_NONE opt_dontuse.h
	MAC_NTPD opt_dontuse.h
	MAC_PARTITION opt_dontuse.h
	MAC_PORTACL opt_dontuse.h
	MAC_PRIORITY opt_dontuse.h
	MAC_SEEOTHERUIDS opt_dontuse.h
	MAC_STATIC opt_mac.h
	MAC_STUB opt_dontuse.h
	MAC_TEST opt_dontuse.h
	MAC_GRANTBYLABEL opt_dontuse.h
	MAC_VERIEXEC opt_dontuse.h
	MAC_VERIEXEC_SHA1 opt_dontuse.h
	MAC_VERIEXEC_SHA256 opt_dontuse.h
	MAC_VERIEXEC_SHA384 opt_dontuse.h
	MAC_VERIEXEC_SHA512 opt_dontuse.h
	MD_ROOT opt_md.h
	MD_ROOT_FSTYPE opt_md.h
	MD_ROOT_READONLY opt_md.h
	MD_ROOT_SIZE opt_md.h
	MD_ROOT_MEM opt_md.h
	MFI_DEBUG opt_mfi.h
	MFI_DECODE_LOG opt_mfi.h
	MPROF_BUFFERS opt_mprof.h
	MPROF_HASH_SIZE opt_mprof.h
	NEW_PCIB opt_global.h
	NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h
	NO_ADAPTIVE_RWLOCKS
	NO_ADAPTIVE_SX
	NO_OBSOLETE_CODE opt_global.h
	NO_SYSCTL_DESCR opt_global.h
	NSWBUF_MIN opt_param.h
	MBUF_PACKET_ZONE_DISABLE opt_global.h
	PANIC_REBOOT_WAIT_TIME opt_panic.h
	PCI_HP opt_pci.h
	PCI_IOV opt_global.h
	PPC_DEBUG opt_ppc.h
	PPC_PROBE_CHIPSET opt_ppc.h
	PPS_SYNC opt_ntp.h
	PREEMPTION opt_sched.h
	QUOTA
	SCHED_4BSD opt_sched.h
	SCHED_STATS opt_sched.h
	SCHED_ULE opt_sched.h
	SLEEPQUEUE_PROFILING
	SLHCI_DEBUG opt_slhci.h
	STACK opt_stack.h
	SUIDDIR
	MSGMNB opt_sysvipc.h
	MSGMNI opt_sysvipc.h
	MSGSEG opt_sysvipc.h
	MSGSSZ opt_sysvipc.h
	MSGTQL opt_sysvipc.h
	SEMMNI opt_sysvipc.h
	SEMMNS opt_sysvipc.h
	SEMMNU opt_sysvipc.h
	SEMMSL opt_sysvipc.h
	SEMOPM opt_sysvipc.h
	SEMUME opt_sysvipc.h
	SHMALL opt_sysvipc.h
	SHMMAX opt_sysvipc.h
	SHMMAXPGS opt_sysvipc.h
	SHMMIN opt_sysvipc.h
	SHMMNI opt_sysvipc.h
	SHMSEG opt_sysvipc.h
	SYSVMSG opt_sysvipc.h
	SYSVSEM opt_sysvipc.h
	SYSVSHM opt_sysvipc.h
	SW_WATCHDOG opt_watchdog.h
	TCPHPTS
	TCP_REQUEST_TRK opt_global.h
	TCP_ACCOUNTING opt_global.h
	TCP_BBR opt_inet.h
	TCP_RACK opt_inet.h
	#
	# TCP SaD Detection is an experimental Sack attack Detection (SaD)
	# algorithm that uses "normal" behaviour with SACK's to detect
	# a possible attack. It is strictly experimental at this point.
	#
	TCP_SAD_DETECTION opt_inet.h
	TURNSTILE_PROFILING
	UMTX_PROFILING
	UMTX_CHAINS opt_global.h
	VERBOSE_SYSINIT
	ZSTDIO opt_zstdio.h

	# Sanitizers
	COVERAGE opt_global.h
	KASAN opt_global.h
	KCOV
	KCSAN opt_global.h
	KMSAN opt_global.h
	KUBSAN opt_global.h

	# POSIX kernel options
	P1003_1B_MQUEUE opt_posix.h
	P1003_1B_SEMAPHORES opt_posix.h
	_KPOSIX_PRIORITY_SCHEDULING opt_posix.h

	# Do we want the config file compiled into the kernel?
	INCLUDE_CONFIG_FILE opt_config.h

	# Options for static filesystems. These should only be used at config
	# time, since the corresponding lkms cannot work if there are any static
	# dependencies. Unusability is enforced by hiding the defines for the
	# options in a never-included header.
	AUTOFS opt_dontuse.h
	CD9660 opt_dontuse.h
	EXT2FS opt_dontuse.h
	FDESCFS opt_dontuse.h
	FFS opt_dontuse.h
	FUSEFS opt_dontuse.h
	MSDOSFS opt_dontuse.h
	NULLFS opt_dontuse.h
	PROCFS opt_dontuse.h
	PSEUDOFS opt_dontuse.h
	SMBFS opt_dontuse.h
	TARFS opt_dontuse.h
	TMPFS opt_dontuse.h
	UDF opt_dontuse.h
	UNIONFS opt_dontuse.h
	ZFS opt_dontuse.h

	# Pseudofs debugging
	PSEUDOFS_TRACE opt_pseudofs.h

	# Tarfs debugging
	TARFS_DEBUG opt_tarfs.h

	# In-kernel GSS-API
	KGSSAPI opt_kgssapi.h
	KGSSAPI_DEBUG opt_kgssapi.h

	# These static filesystems have one slightly bogus static dependency in
	# sys/i386/i386/autoconf.c. If any of these filesystems are
	# statically compiled into the kernel, code for mounting them as root
	# filesystems will be enabled - but look below.
	# NFSCL - client
	# NFSD - server
	NFSCL opt_nfs.h
	NFSD opt_nfs.h

	# filesystems and libiconv bridge
	CD9660_ICONV opt_dontuse.h
	MSDOSFS_ICONV opt_dontuse.h
	UDF_ICONV opt_dontuse.h

	# If you are following the conditions in the copyright,
	# you can enable soft-updates which will speed up a lot of thigs
	# and make the system safer from crashes at the same time.
	# otherwise a STUB module will be compiled in.
	SOFTUPDATES opt_ffs.h

	# On small, embedded systems, it can be useful to turn off support for
	# snapshots. It saves about 30-40k for a feature that would be lightly
	# used, if it is used at all.
	NO_FFS_SNAPSHOT opt_ffs.h

	# Enabling this option turns on support for Access Control Lists in UFS,
	# which can be used to support high security configurations. Depends on
	# UFS_EXTATTR.
	UFS_ACL opt_ufs.h

	# Enabling this option turns on support for extended attributes in UFS-based
	# filesystems, which can be used to support high security configurations
	# as well as new filesystem features.
	UFS_EXTATTR opt_ufs.h
	UFS_EXTATTR_AUTOSTART opt_ufs.h

	# Enable fast hash lookups for large directories on UFS-based filesystems.
	UFS_DIRHASH opt_ufs.h

	# Enable gjournal-based UFS journal.
	UFS_GJOURNAL opt_ufs.h

	# The below sentence is not in English, and neither is this one.
	# We plan to remove the static dependences above, with a
	# <filesystem>_ROOT option to control if it usable as root. This list
	# allows these options to be present in config files already (though
	# they won't make any difference yet).
	NFS_ROOT opt_nfsroot.h

	# SMB/CIFS requester
	NETSMB opt_netsmb.h

	# Enable debugnet(4) networking support.
	DEBUGNET opt_global.h
	# Enable netdump(4) client support.
	NETDUMP opt_global.h
	# Enable netgdb(4) support.
	NETGDB opt_global.h

	# Options used only in subr_param.c.
	HZ opt_param.h
	MAXFILES opt_param.h
	NBUF opt_param.h
	NSFBUFS opt_param.h
	VM_BCACHE_SIZE_MAX opt_param.h
	VM_SWZONE_SIZE_MAX opt_param.h
	MAXUSERS
	DFLDSIZ opt_param.h
	MAXDSIZ opt_param.h
	MAXSSIZ opt_param.h

	# Generic SCSI options.
	CAM_MAX_HIGHPOWER opt_cam.h
	CAMDEBUG opt_cam.h
	CAM_DEBUG_COMPILE opt_cam.h
	CAM_DEBUG_DELAY opt_cam.h
	CAM_DEBUG_BUS opt_cam.h
	CAM_DEBUG_TARGET opt_cam.h
	CAM_DEBUG_LUN opt_cam.h
	CAM_DEBUG_FLAGS opt_cam.h
	CAM_BOOT_DELAY opt_cam.h
	CAM_IOSCHED_DYNAMIC opt_cam.h
	CAM_IO_STATS opt_cam.h
	CAM_TEST_FAILURE opt_cam.h
	SCSI_DELAY opt_scsi.h
	SCSI_NO_SENSE_STRINGS opt_scsi.h
	SCSI_NO_OP_STRINGS opt_scsi.h

	# Options used only in cam/ata/ata_da.c
	ATA_STATIC_ID opt_ada.h

	# Options used only in cam/scsi/scsi_cd.c
	CHANGER_MIN_BUSY_SECONDS opt_cd.h
	CHANGER_MAX_BUSY_SECONDS opt_cd.h

	# Options used only in cam/scsi/scsi_da.c
	DA_TRACK_REFS opt_da.h

	# Options used only in cam/scsi/scsi_sa.c.
	SA_IO_TIMEOUT opt_sa.h
	SA_SPACE_TIMEOUT opt_sa.h
	SA_REWIND_TIMEOUT opt_sa.h
	SA_ERASE_TIMEOUT opt_sa.h
	SA_1FM_AT_EOD opt_sa.h

	# Options used only in cam/scsi/scsi_pt.c
	SCSI_PT_DEFAULT_TIMEOUT opt_pt.h

	# Options used only in cam/scsi/scsi_ses.c
	SES_ENABLE_PASSTHROUGH opt_ses.h

	# Options used in dev/sym/ (Symbios SCSI driver).
	SYM_SETUP_SCSI_DIFF opt_sym.h #-HVD support for 825a, 875, 885
	# disabled:0 (default), enabled:1
	SYM_SETUP_PCI_PARITY opt_sym.h #-PCI parity checking
	# disabled:0, enabled:1 (default)
	SYM_SETUP_MAX_LUN opt_sym.h #-Number of LUNs supported
	# default:8, range:[1..64]

	# Options used only in dev/isp/*
	ISP_TARGET_MODE opt_isp.h
	ISP_FW_CRASH_DUMP opt_isp.h
	ISP_DEFAULT_ROLES opt_isp.h
	ISP_INTERNAL_TARGET opt_isp.h
	ISP_FCTAPE_OFF opt_isp.h

	# Options used only in dev/iscsi
	ISCSI_INITIATOR_DEBUG opt_iscsi_initiator.h

	# Net stuff.
	ACCEPT_FILTER_DATA
	ACCEPT_FILTER_DNS
	ACCEPT_FILTER_HTTP
	ALTQ opt_global.h
	ALTQ_CBQ opt_altq.h
	ALTQ_CDNR opt_altq.h
	ALTQ_CODEL opt_altq.h
	ALTQ_DEBUG opt_altq.h
	ALTQ_HFSC opt_altq.h
	ALTQ_FAIRQ opt_altq.h
	ALTQ_NOPCC opt_altq.h
	ALTQ_PRIQ opt_altq.h
	ALTQ_RED opt_altq.h
	ALTQ_RIO opt_altq.h
	BOOTP opt_bootp.h
	BOOTP_BLOCKSIZE opt_bootp.h
	BOOTP_COMPAT opt_bootp.h
	BOOTP_NFSROOT opt_bootp.h
	BOOTP_NFSV3 opt_bootp.h
	BOOTP_WIRED_TO opt_bootp.h
	DEVICE_POLLING
	DUMMYNET opt_ipdn.h
	RATELIMIT opt_ratelimit.h
	RATELIMIT_DEBUG opt_ratelimit.h
	INET opt_inet.h
	INET6 opt_inet6.h
	STATS opt_global.h
	IPDIVERT
	IPFILTER opt_ipfilter.h
	IPFILTER_DEFAULT_BLOCK opt_ipfilter.h
	IPFILTER_LOG opt_ipfilter.h
	IPFILTER_LOOKUP opt_ipfilter.h
	IPFIREWALL opt_ipfw.h
	IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h
	IPFIREWALL_NAT opt_ipfw.h
	IPFIREWALL_NAT64 opt_ipfw.h
	IPFIREWALL_NPTV6 opt_ipfw.h
	IPFIREWALL_VERBOSE opt_ipfw.h
	IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h
	IPFIREWALL_PMOD opt_ipfw.h
	IPSEC opt_ipsec.h
	IPSEC_DEBUG opt_ipsec.h
	IPSEC_SUPPORT opt_ipsec.h
	IPSTEALTH
	KERN_TLS
	KRPC
	LIBALIAS
	LIBMCHAIN
	MBUF_PROFILING
	MBUF_STRESS_TEST
	MROUTING opt_mrouting.h
	NFSLOCKD
	NETLINK opt_global.h
	PF_DEFAULT_TO_DROP opt_pf.h
	ROUTE_MPATH opt_route.h
	ROUTETABLES opt_route.h
	FIB_ALGO opt_route.h
	RSS opt_rss.h
	SLIP_IFF_OPTS opt_slip.h
	TCPPCAP opt_global.h
	SIFTR
	TCP_BLACKBOX opt_global.h
	TCP_HHOOK opt_global.h
	TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
	TCP_RFC7413 opt_inet.h
	TCP_RFC7413_MAX_KEYS opt_inet.h
	TCP_RFC7413_MAX_PSKS opt_inet.h
	TCP_SIGNATURE opt_ipsec.h
	VLAN_ARRAY opt_vlan.h
	XDR
	XBONEHACK

	#
	# SCTP
	#
	SCTP opt_sctp.h
	SCTP_SUPPORT opt_sctp.h
	SCTP_DEBUG opt_sctp.h # Enable debug printfs
	SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity
	SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free
	SCTP_MBCNT_LOGGING opt_sctp.h # Log to KTR mbcnt activity
	SCTP_PACKET_LOGGING opt_sctp.h # Log to a packet buffer last N packets
	SCTP_LTRACE_CHUNKS opt_sctp.h # Log to KTR chunks processed
	SCTP_LTRACE_ERRORS opt_sctp.h # Log to KTR error returns.
	SCTP_USE_PERCPU_STAT opt_sctp.h # Use per cpu stats.
	SCTP_MCORE_INPUT opt_sctp.h # Have multiple input threads for input mbufs
	SCTP_LOCAL_TRACE_BUF opt_sctp.h # Use tracebuffer exported via sysctl
	SCTP_DETAILED_STR_STATS opt_sctp.h # Use per PR-SCTP policy stream stats
	#
	#
	#

	# Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
	# Each netgraph node type can be either be compiled into the kernel
	# or loaded dynamically. To get the former, include the corresponding
	# option below. Each type has its own man page, e.g. ng_async(4).
	NETGRAPH
	NETGRAPH_DEBUG opt_netgraph.h
	NETGRAPH_ASYNC opt_netgraph.h
	NETGRAPH_BLUETOOTH opt_netgraph.h
	NETGRAPH_BLUETOOTH_BT3C opt_netgraph.h
	NETGRAPH_BLUETOOTH_H4 opt_netgraph.h
	NETGRAPH_BLUETOOTH_HCI opt_netgraph.h
	NETGRAPH_BLUETOOTH_L2CAP opt_netgraph.h
	NETGRAPH_BLUETOOTH_SOCKET opt_netgraph.h
	NETGRAPH_BLUETOOTH_UBT opt_netgraph.h
	NETGRAPH_BLUETOOTH_UBTBCMFW opt_netgraph.h
	NETGRAPH_BPF opt_netgraph.h
	NETGRAPH_BRIDGE opt_netgraph.h
	NETGRAPH_CAR opt_netgraph.h
	NETGRAPH_CHECKSUM opt_netgraph.h
	NETGRAPH_CISCO opt_netgraph.h
	NETGRAPH_DEFLATE opt_netgraph.h
	NETGRAPH_DEVICE opt_netgraph.h
	NETGRAPH_ECHO opt_netgraph.h
	NETGRAPH_EIFACE opt_netgraph.h
	NETGRAPH_ETHER opt_netgraph.h
	NETGRAPH_ETHER_ECHO opt_netgraph.h
	NETGRAPH_FEC opt_netgraph.h
	NETGRAPH_FRAME_RELAY opt_netgraph.h
	NETGRAPH_GIF opt_netgraph.h
	NETGRAPH_GIF_DEMUX opt_netgraph.h
	NETGRAPH_HOLE opt_netgraph.h
	NETGRAPH_IFACE opt_netgraph.h
	NETGRAPH_IP_INPUT opt_netgraph.h
	NETGRAPH_IPFW opt_netgraph.h
	NETGRAPH_KSOCKET opt_netgraph.h
	NETGRAPH_L2TP opt_netgraph.h
	NETGRAPH_LMI opt_netgraph.h
	NETGRAPH_MPPC_COMPRESSION opt_netgraph.h
	NETGRAPH_MPPC_ENCRYPTION opt_netgraph.h
	NETGRAPH_NAT opt_netgraph.h
	NETGRAPH_NETFLOW opt_netgraph.h
	NETGRAPH_ONE2MANY opt_netgraph.h
	NETGRAPH_PATCH opt_netgraph.h
	NETGRAPH_PIPE opt_netgraph.h
	NETGRAPH_PPP opt_netgraph.h
	NETGRAPH_PPPOE opt_netgraph.h
	NETGRAPH_PPTPGRE opt_netgraph.h
	NETGRAPH_PRED1 opt_netgraph.h
	NETGRAPH_RFC1490 opt_netgraph.h
	NETGRAPH_SOCKET opt_netgraph.h
	NETGRAPH_SPLIT opt_netgraph.h
	NETGRAPH_SPPP opt_netgraph.h
	NETGRAPH_TAG opt_netgraph.h
	NETGRAPH_TCPMSS opt_netgraph.h
	NETGRAPH_TEE opt_netgraph.h
	NETGRAPH_TTY opt_netgraph.h
	NETGRAPH_UI opt_netgraph.h
	NETGRAPH_VJC opt_netgraph.h
	NETGRAPH_VLAN opt_netgraph.h

	# DRM options
	DRM_DEBUG opt_drm.h

	TI_SF_BUF_JUMBO opt_ti.h
	TI_JUMBO_HDRSPLIT opt_ti.h

	# Misc debug flags. Most of these should probably be replaced with
	# 'DEBUG', and then let people recompile just the interesting modules
	# with 'make CC="cc -DDEBUG"'.
	DEBUG_1284 opt_ppb_1284.h
	LPT_DEBUG opt_lpt.h
	PLIP_DEBUG opt_plip.h
	LOCKF_DEBUG opt_debug_lockf.h
	SI_DEBUG opt_debug_si.h
	IFMEDIA_DEBUG opt_ifmedia.h

	# Fb options
	FB_DEBUG opt_fb.h

	# ppbus related options
	PERIPH_1284 opt_ppb_1284.h
	DONTPROBE_1284 opt_ppb_1284.h

	# smbus related options
	ENABLE_ALART opt_intpm.h

	# These cause changes all over the kernel
	BLKDEV_IOSIZE opt_global.h
	BURN_BRIDGES opt_global.h
	DEBUG opt_global.h
	DEBUG_LOCKS opt_global.h
	DEBUG_VFS_LOCKS opt_global.h
	DFLTPHYS opt_global.h
	DIAGNOSTIC opt_global.h
	INVARIANT_SUPPORT opt_global.h
	INVARIANTS opt_global.h
	KASSERT_PANIC_OPTIONAL opt_global.h
	MAXCPU opt_global.h
	MAXMEMDOM opt_global.h
	MAXPHYS opt_maxphys.h
	MCLSHIFT opt_global.h
	MUTEX_NOINLINE opt_global.h
	LOCK_PROFILING opt_global.h
	MSIZE opt_global.h
	REGRESSION opt_global.h
	RWLOCK_NOINLINE opt_global.h
	SX_NOINLINE opt_global.h
	VFS_BIO_DEBUG opt_global.h

	# These are VM related options
	VM_KMEM_SIZE opt_vm.h
	VM_KMEM_SIZE_SCALE opt_vm.h
	VM_KMEM_SIZE_MAX opt_vm.h
	VM_NRESERVLEVEL opt_vm.h
	VM_LEVEL_0_ORDER opt_vm.h
	NO_SWAPPING opt_vm.h
	MALLOC_MAKE_FAILURES opt_vm.h
	MALLOC_PROFILE opt_vm.h
	MALLOC_DEBUG_MAXZONES opt_vm.h

	# The MemGuard replacement allocator used for tamper-after-free detection
	DEBUG_MEMGUARD opt_vm.h

	# The RedZone malloc(9) protection
	DEBUG_REDZONE opt_vm.h

	# Standard SMP options
	EARLY_AP_STARTUP opt_global.h
	SMP opt_global.h
	NUMA opt_global.h
	+CPUGRP_SCORE opt_global.h

	# Size of the kernel message buffer
	MSGBUF_SIZE opt_msgbuf.h

	# NFS options
	NFS_MINATTRTIMO opt_nfs.h
	NFS_MAXATTRTIMO opt_nfs.h
	NFS_MINDIRATTRTIMO opt_nfs.h
	NFS_MAXDIRATTRTIMO opt_nfs.h
	NFS_DEBUG opt_nfs.h

	# TMPFS options
	TMPFS_PAGES_MINRESERVED opt_tmpfs.h

	# Options for uart(4)
	UART_PPS_ON_CTS opt_uart.h
	UART_POLL_FREQ opt_uart.h
	UART_DEV_TOLERANCE_PCT opt_uart.h

	# options for bus/device framework
	BUS_DEBUG opt_bus.h

	# options for USB support
	USB_DEBUG opt_usb.h
	USB_HOST_ALIGN opt_usb.h
	USB_REQ_DEBUG opt_usb.h
	USB_TEMPLATE opt_usb.h
	USB_VERBOSE opt_usb.h
	USB_DMA_SINGLE_ALLOC opt_usb.h
	USB_EHCI_BIG_ENDIAN_DESC opt_usb.h
	U3G_DEBUG opt_u3g.h
	UKBD_DFLT_KEYMAP opt_ukbd.h
	UPLCOM_INTR_INTERVAL opt_uplcom.h
	UVSCOM_DEFAULT_OPKTSIZE opt_uvscom.h
	UVSCOM_INTR_INTERVAL opt_uvscom.h

	# options for the Realtek rtwn driver
	RTWN_DEBUG opt_rtwn.h
	RTWN_WITHOUT_UCODE opt_rtwn.h

	# Embedded system options
	INIT_PATH

	ROOTDEVNAME

	FDC_DEBUG opt_fdc.h
	PCFCLOCK_VERBOSE opt_pcfclock.h
	PCFCLOCK_MAX_RETRIES opt_pcfclock.h

	KTR opt_global.h
	KTR_ALQ opt_ktr.h
	KTR_MASK opt_ktr.h
	KTR_CPUMASK opt_ktr.h
	KTR_COMPILE opt_global.h
	KTR_BOOT_ENTRIES opt_global.h
	KTR_ENTRIES opt_global.h
	KTR_VERBOSE opt_ktr.h
	WITNESS opt_global.h
	WITNESS_KDB opt_witness.h
	WITNESS_NO_VNODE opt_witness.h
	WITNESS_SKIPSPIN opt_witness.h
	WITNESS_COUNT opt_witness.h
	OPENSOLARIS_WITNESS opt_global.h

	EPOCH_TRACE opt_global.h

	# options for ACPI support
	ACPI_DEBUG opt_acpi.h
	ACPI_MAX_TASKS opt_acpi.h
	ACPI_MAX_THREADS opt_acpi.h
	DEV_ACPI opt_acpi.h
	ACPI_EARLY_EPYC_WAR opt_acpi.h

	# options for IOMMU support
	IOMMU opt_iommu.h

	# ISA support
	DEV_ISA opt_isa.h
	ISAPNP opt_dontuse.h

	# various 'device presence' options.
	DEV_BPF opt_bpf.h
	DEV_CARP opt_carp.h
	DEV_NETMAP opt_global.h
	DEV_PCI opt_pci.h
	DEV_PF opt_pf.h
	DEV_PFLOG opt_pf.h
	DEV_PFSYNC opt_pf.h
	DEV_SPLASH opt_splash.h
	DEV_VLAN opt_vlan.h

	# bce driver
	BCE_DEBUG opt_bce.h
	BCE_NVRAM_WRITE_SUPPORT opt_bce.h

	SOCKBUF_DEBUG opt_global.h


	# options for hifn driver
	HIFN_DEBUG opt_hifn.h
	HIFN_RNDTEST opt_hifn.h

	# options for safenet driver
	SAFE_DEBUG opt_safe.h
	SAFE_NO_RNG opt_safe.h
	SAFE_RNDTEST opt_safe.h

	# syscons/vt options
	MAXCONS opt_syscons.h
	SC_ALT_MOUSE_IMAGE opt_syscons.h
	SC_CUT_SPACES2TABS opt_syscons.h
	SC_CUT_SEPCHARS opt_syscons.h
	SC_DEBUG_LEVEL opt_syscons.h
	SC_DFLT_FONT opt_syscons.h
	SC_DFLT_TERM opt_syscons.h
	SC_DISABLE_KDBKEY opt_syscons.h
	SC_DISABLE_REBOOT opt_syscons.h
	SC_HISTORY_SIZE opt_syscons.h
	SC_KERNEL_CONS_ATTR opt_syscons.h
	SC_KERNEL_CONS_ATTRS opt_syscons.h
	SC_KERNEL_CONS_REV_ATTR opt_syscons.h
	SC_MOUSE_CHAR opt_syscons.h
	SC_NO_CUTPASTE opt_syscons.h
	SC_NO_FONT_LOADING opt_syscons.h
	SC_NO_HISTORY opt_syscons.h
	SC_NO_MODE_CHANGE opt_syscons.h
	SC_NO_SUSPEND_VTYSWITCH opt_syscons.h
	SC_NO_SYSMOUSE opt_syscons.h
	SC_NO_TERM_DUMB opt_syscons.h
	SC_NO_TERM_SC opt_syscons.h
	SC_NO_TERM_TEKEN opt_syscons.h
	SC_NORM_ATTR opt_syscons.h
	SC_NORM_REV_ATTR opt_syscons.h
	SC_PIXEL_MODE opt_syscons.h
	SC_RENDER_DEBUG opt_syscons.h
	SC_TWOBUTTON_MOUSE opt_syscons.h
	VT_ALT_TO_ESC_HACK opt_syscons.h
	VT_FB_MAX_WIDTH opt_syscons.h
	VT_FB_MAX_HEIGHT opt_syscons.h
	VT_MAXWINDOWS opt_syscons.h
	VT_TWOBUTTON_MOUSE opt_syscons.h
	DEV_SC opt_syscons.h
	DEV_VT opt_syscons.h

	# teken terminal emulator options
	TEKEN_CONS25 opt_teken.h
	TEKEN_UTF8 opt_teken.h
	TERMINAL_KERN_ATTR opt_teken.h
	TERMINAL_NORM_ATTR opt_teken.h

	# options for printf
	PRINTF_BUFR_SIZE opt_printf.h
	BOOT_TAG opt_printf.h
	BOOT_TAG_SZ opt_printf.h

	# kbd options
	KBD_DISABLE_KEYMAP_LOAD opt_kbd.h
	KBD_INSTALL_CDEV opt_kbd.h
	KBD_MAXRETRY opt_kbd.h
	KBD_MAXWAIT opt_kbd.h
	KBD_RESETDELAY opt_kbd.h
	KBD_DELAY1 opt_kbd.h
	KBD_DELAY2 opt_kbd.h
	KBDIO_DEBUG opt_kbd.h
	KBDMUX_DFLT_KEYMAP opt_kbdmux.h

	# options for the Atheros driver
	ATH_DEBUG opt_ath.h
	ATH_TXBUF opt_ath.h
	ATH_RXBUF opt_ath.h
	ATH_DIAGAPI opt_ath.h
	ATH_TX99_DIAG opt_ath.h
	ATH_ENABLE_DFS opt_ath.h
	ATH_EEPROM_FIRMWARE opt_ath.h
	ATH_ENABLE_RADIOTAP_VENDOR_EXT opt_ath.h
	ATH_DEBUG_ALQ opt_ath.h
	ATH_KTR_INTR_DEBUG opt_ath.h

	AH_DEBUG opt_ah.h
	AH_ASSERT opt_ah.h
	AH_DEBUG_ALQ opt_ah.h
	AH_REGOPS_FUNC opt_ah.h
	AH_WRITE_REGDOMAIN opt_ah.h
	AH_DEBUG_COUNTRY opt_ah.h
	AH_WRITE_EEPROM opt_ah.h
	AH_PRIVATE_DIAG opt_ah.h
	AH_NEED_DESC_SWAP opt_ah.h
	AH_USE_INIPDGAIN opt_ah.h
	AH_MAXCHAN opt_ah.h
	AH_RXCFG_SDMAMW_4BYTES opt_ah.h
	AH_INTERRUPT_DEBUGGING opt_ah.h
	# AR5416 and later interrupt mitigation
	# XXX do not use this for AR9130
	AH_AR5416_INTERRUPT_MITIGATION opt_ah.h

	# options for the Altera mSGDMA driver (altera_msgdma)
	ALTERA_MSGDMA_DESC_STD opt_altera_msgdma.h
	ALTERA_MSGDMA_DESC_EXT opt_altera_msgdma.h
	ALTERA_MSGDMA_DESC_PF_STD opt_altera_msgdma.h
	ALTERA_MSGDMA_DESC_PF_EXT opt_altera_msgdma.h

	# options for the Broadcom BCM43xx driver (bwi)
	BWI_DEBUG opt_bwi.h
	BWI_DEBUG_VERBOSE opt_bwi.h

	# options for the Brodacom BCM43xx driver (bwn)
	BWN_DEBUG opt_bwn.h
	BWN_GPL_PHY opt_bwn.h
	BWN_USE_SIBA opt_bwn.h

	# Options for the SIBA driver
	SIBA_DEBUG opt_siba.h

	# options for the Marvell 8335 wireless driver
	MALO_DEBUG opt_malo.h
	MALO_TXBUF opt_malo.h
	MALO_RXBUF opt_malo.h

	# options for the Marvell wireless driver
	MWL_DEBUG opt_mwl.h
	MWL_TXBUF opt_mwl.h
	MWL_RXBUF opt_mwl.h
	MWL_DIAGAPI opt_mwl.h
	MWL_AGGR_SIZE opt_mwl.h
	MWL_TX_NODROP opt_mwl.h

	# Options for the Marvell NETA driver
	MVNETA_MULTIQUEUE opt_mvneta.h
	MVNETA_KTR opt_mvneta.h

	# Options for the Intel 802.11ac wireless driver
	IWM_DEBUG opt_iwm.h

	# Options for the Intel 802.11n wireless driver
	IWN_DEBUG opt_iwn.h

	# Options for the Intel 3945ABG wireless driver
	WPI_DEBUG opt_wpi.h

	# dcons options
	DCONS_BUF_SIZE opt_dcons.h
	DCONS_POLL_HZ opt_dcons.h
	DCONS_FORCE_CONSOLE opt_dcons.h
	DCONS_FORCE_GDB opt_dcons.h

	# HWPMC options
	HWPMC_DEBUG opt_global.h
	HWPMC_HOOKS

	# 802.11 support layer
	IEEE80211_DEBUG opt_wlan.h
	IEEE80211_DEBUG_REFCNT opt_wlan.h
	IEEE80211_SUPPORT_MESH opt_wlan.h
	IEEE80211_SUPPORT_SUPERG opt_wlan.h
	IEEE80211_SUPPORT_TDMA opt_wlan.h
	IEEE80211_ALQ opt_wlan.h
	IEEE80211_DFS_DEBUG opt_wlan.h

	# 802.11 TDMA support
	TDMA_SLOTLEN_DEFAULT opt_tdma.h
	TDMA_SLOTCNT_DEFAULT opt_tdma.h
	TDMA_BINTVAL_DEFAULT opt_tdma.h
	TDMA_TXRATE_11B_DEFAULT opt_tdma.h
	TDMA_TXRATE_11G_DEFAULT opt_tdma.h
	TDMA_TXRATE_11A_DEFAULT opt_tdma.h
	TDMA_TXRATE_TURBO_DEFAULT opt_tdma.h
	TDMA_TXRATE_HALF_DEFAULT opt_tdma.h
	TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h
	TDMA_TXRATE_11NA_DEFAULT opt_tdma.h
	TDMA_TXRATE_11NG_DEFAULT opt_tdma.h

	# VideoMode
	PICKMODE_DEBUG opt_videomode.h

	# Network stack virtualization options
	VIMAGE opt_global.h
	VNET_DEBUG opt_global.h

	# Common Flash Interface (CFI) options
	CFI_SUPPORT_STRATAFLASH opt_cfi.h
	CFI_ARMEDANDDANGEROUS opt_cfi.h
	CFI_HARDWAREBYTESWAP opt_cfi.h

	# Sound options
	SND_DEBUG opt_snd.h
	SND_DIAGNOSTIC opt_snd.h
	SND_FEEDER_MULTIFORMAT opt_snd.h
	SND_FEEDER_FULL_MULTIFORMAT opt_snd.h
	SND_FEEDER_RATE_HP opt_snd.h
	SND_PCM_64 opt_snd.h
	SND_OLDSTEREO opt_snd.h

	X86BIOS

	# Flattened device tree options
	FDT opt_platform.h
	FDT_DTB_STATIC opt_platform.h

	# OFED Infiniband stack
	OFED opt_ofed.h
	OFED_DEBUG_INIT opt_ofed.h
	SDP opt_ofed.h
	SDP_DEBUG opt_ofed.h
	IPOIB opt_ofed.h
	IPOIB_DEBUG opt_ofed.h
	IPOIB_CM opt_ofed.h

	# Resource Accounting
	RACCT opt_global.h
	RACCT_DEFAULT_TO_DISABLED opt_global.h

	# Resource Limits
	RCTL opt_global.h

	# Random number generator(s)
	# Alternative RNG algorithm.
	RANDOM_FENESTRASX opt_global.h
	# With this, no entropy processor is loaded, but the entropy
	# harvesting infrastructure is present. This means an entropy
	# processor may be loaded as a module.
	RANDOM_LOADABLE opt_global.h
	# This turns on high-rate and potentially expensive harvesting in
	# the uma slab allocator.
	RANDOM_ENABLE_UMA opt_global.h
	RANDOM_ENABLE_ETHER opt_global.h

	# This options turns TPM into entropy source.
	TPM_HARVEST opt_tpm.h

	# BHND(4) driver
	BHND_LOGLEVEL opt_global.h

	# GPIO and child devices
	GPIO_SPI_DEBUG opt_gpio.h

	# SPI devices
	SPIGEN_LEGACY_CDEVNAME opt_spi.h

	# etherswitch(4) driver
	RTL8366_SOFT_RESET opt_etherswitch.h

	# evdev protocol support
	EVDEV_SUPPORT opt_evdev.h
	EVDEV_DEBUG opt_evdev.h
	UINPUT_DEBUG opt_evdev.h

	# Hyper-V network driver
	HN_DEBUG opt_hn.h

	# CAM-based MMC stack
	MMCCAM
	# Encrypted kernel crash dumps
	EKCD opt_ekcd.h

	# NVME options
	NVME_USE_NVD opt_nvme.h

	# amdsbwd options
	AMDSBWD_DEBUG opt_amdsbwd.h

	# gcov support
	GCOV opt_global.h
	LINDEBUGFS

	# options for HID support
	HID_DEBUG opt_hid.h
	IICHID_DEBUG opt_hid.h
	IICHID_SAMPLING opt_hid.h
	HKBD_DFLT_KEYMAP opt_hkbd.h
	HIDRAW_MAKE_UHID_ALIAS opt_hid.h

	# kenv options
	# The early kernel environment (loader environment, config(8)-provided static)
	# is typically cleared after the dynamic environment comes up to ensure that
	# we're not inadvertently holding on to 'secret' values in these stale envs.
	# This option is insecure except in controlled environments where the static
	# environment's contents are known to be safe.
	PRESERVE_EARLY_KENV opt_global.h
	diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
	index 1f9577fddf9c..ec6b753cdf75 100644
	--- a/sys/kern/subr_smp.c
	+++ b/sys/kern/subr_smp.c
	@@ -1,1352 +1,1358 @@
	/*-
	* SPDX-License-Identifier: BSD-2-Clause
	*
	* Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	/*
	* This module holds the global variables and machine independent functions
	* used for the kernel SMP support.
	*/

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	#include <sys/ktr.h>
	#include <sys/proc.h>
	#include <sys/bus.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mutex.h>
	#include <sys/pcpu.h>
	#include <sys/sched.h>
	#include <sys/smp.h>
	#include <sys/sysctl.h>

	#include <machine/cpu.h>
	#include <machine/pcb.h>
	#include <machine/smp.h>

	#include "opt_sched.h"
	+#include "opt_global.h"

	#ifdef SMP
	MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data");

	volatile cpuset_t stopped_cpus;
	volatile cpuset_t started_cpus;
	volatile cpuset_t suspended_cpus;
	cpuset_t hlt_cpus_mask;
	cpuset_t logical_cpus_mask;

	void (*cpustop_restartfunc)(void);
	#endif

	static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);

	/* This is used in modules that need to work in both SMP and UP. */
	cpuset_t all_cpus;

	int mp_ncpus;
	/* export this for libkvm consumers. */
	int mp_maxcpus = MAXCPU;

	volatile int smp_started;
	u_int mp_maxid;

	/* Array of CPU contexts saved during a panic. */
	struct pcb *stoppcbs;

	static SYSCTL_NODE(_kern, OID_AUTO, smp,
	CTLFLAG_RD \| CTLFLAG_CAPRD \| CTLFLAG_MPSAFE, NULL,
	"Kernel SMP");

	SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD\|CTLFLAG_CAPRD, &mp_maxid, 0,
	"Max CPU ID.");

	SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD\|CTLFLAG_CAPRD, &mp_maxcpus,
	0, "Max number of CPUs that the system was compiled for.");

	SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD\|CTLTYPE_INT\|CTLFLAG_MPSAFE,
	NULL, 0, sysctl_kern_smp_active, "I",
	"Indicates system is running in SMP mode");

	int smp_disabled = 0; /* has smp been disabled? */
	SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN\|CTLFLAG_CAPRD,
	&smp_disabled, 0, "SMP has been disabled from the loader");

	int smp_cpus = 1; /* how many cpu's running */
	SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD\|CTLFLAG_CAPRD, &smp_cpus, 0,
	"Number of CPUs online");

	int smp_threads_per_core = 1; /* how many SMT threads are running per core */
	SYSCTL_INT(_kern_smp, OID_AUTO, threads_per_core, CTLFLAG_RD\|CTLFLAG_CAPRD,
	&smp_threads_per_core, 0, "Number of SMT threads online per core");

	int mp_ncores = -1; /* how many physical cores running */
	SYSCTL_INT(_kern_smp, OID_AUTO, cores, CTLFLAG_RD\|CTLFLAG_CAPRD, &mp_ncores, 0,
	"Number of physical cores online");

	int smp_topology = 0; /* Which topology we're using. */
	SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
	"Topology override setting; 0 is default provided by hardware.");

	#ifdef SMP
	/* Variables needed for SMP rendezvous. */
	static volatile int smp_rv_ncpus;
	static void (volatile smp_rv_setup_func)(void arg);
	static void (volatile smp_rv_action_func)(void arg);
	static void (volatile smp_rv_teardown_func)(void arg);
	static void *volatile smp_rv_func_arg;
	static volatile int smp_rv_waiters[4];

	/*
	* Shared mutex to restrict busywaits between smp_rendezvous() and
	* smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these
	* functions trigger at once and cause multiple CPUs to busywait with
	* interrupts disabled.
	*/
	struct mtx smp_ipi_mtx;

	/*
	* Let the MD SMP code initialize mp_maxid very early if it can.
	*/
	static void
	mp_setmaxid(void *dummy)
	{

	cpu_mp_setmaxid();

	KASSERT(mp_ncpus >= 1, ("%s: CPU count < 1", __func__));
	KASSERT(mp_ncpus > 1 \|\| mp_maxid == 0,
	("%s: one CPU but mp_maxid is not zero", __func__));
	KASSERT(mp_maxid >= mp_ncpus - 1,
	("%s: counters out of sync: max %d, count %d", __func__,
	mp_maxid, mp_ncpus));

	cpusetsizemin = howmany(mp_maxid + 1, NBBY);
	}
	SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);

	/*
	* Call the MD SMP initialization code.
	*/
	static void
	mp_start(void *dummy)
	{

	mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);

	/* Probe for MP hardware. */
	if (smp_disabled != 0 \|\| cpu_mp_probe() == 0) {
	mp_ncores = 1;
	mp_ncpus = 1;
	CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
	return;
	}

	cpu_mp_start();
	printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
	mp_ncpus);

	/* Provide a default for most architectures that don't have SMT/HTT. */
	if (mp_ncores < 0)
	mp_ncores = mp_ncpus;

	stoppcbs = mallocarray(mp_maxid + 1, sizeof(struct pcb), M_DEVBUF,
	M_WAITOK \| M_ZERO);

	cpu_mp_announce();
	}
	SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);

	void
	forward_signal(struct thread *td)
	{
	int id;

	/*
	* signotify() has already set TDA_AST and TDA_SIG on td_ast for
	* this thread, so all we need to do is poke it if it is currently
	* executing so that it executes ast().
	*/
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	KASSERT(TD_IS_RUNNING(td),
	("forward_signal: thread is not TDS_RUNNING"));

	CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);

	if (!smp_started \|\| cold \|\| KERNEL_PANICKED())
	return;

	/* No need to IPI ourself. */
	if (td == curthread)
	return;

	id = td->td_oncpu;
	if (id == NOCPU)
	return;
	ipi_cpu(id, IPI_AST);
	}

	/*
	* When called the executing CPU will send an IPI to all other CPUs
	* requesting that they halt execution.
	*
	* Usually (but not necessarily) called with 'other_cpus' as its arg.
	*
	* - Signals all CPUs in map to stop.
	* - Waits for each to stop.
	*
	* Returns:
	* -1: error
	* 0: NA
	* 1: ok
	*
	*/
	#if defined(__amd64__) \|\| defined(__i386__)
	#define X86 1
	#else
	#define X86 0
	#endif
	static int
	generic_stop_cpus(cpuset_t map, u_int type)
	{
	#ifdef KTR
	char cpusetbuf[CPUSETBUFSIZ];
	#endif
	static volatile u_int stopping_cpu = NOCPU;
	int i;
	volatile cpuset_t *cpus;

	KASSERT(
	type == IPI_STOP \|\| type == IPI_STOP_HARD
	#if X86
	\|\| type == IPI_SUSPEND
	#endif
	, ("%s: invalid stop type", __func__));

	if (!smp_started)
	return (0);

	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
	cpusetobj_strprint(cpusetbuf, &map), type);

	#if X86
	/*
	* When suspending, ensure there are are no IPIs in progress.
	* IPIs that have been issued, but not yet delivered (e.g.
	* not pending on a vCPU when running under virtualization)
	* will be lost, violating FreeBSD's assumption of reliable
	* IPI delivery.
	*/
	if (type == IPI_SUSPEND)
	mtx_lock_spin(&smp_ipi_mtx);
	#endif

	#if X86
	if (!nmi_is_broadcast \|\| nmi_kdb_lock == 0) {
	#endif
	if (stopping_cpu != PCPU_GET(cpuid))
	while (atomic_cmpset_int(&stopping_cpu, NOCPU,
	PCPU_GET(cpuid)) == 0)
	while (stopping_cpu != NOCPU)
	cpu_spinwait(); /* spin */

	/* send the stop IPI to all CPUs in map */
	ipi_selected(map, type);
	#if X86
	}
	#endif

	#if X86
	if (type == IPI_SUSPEND)
	cpus = &suspended_cpus;
	else
	#endif
	cpus = &stopped_cpus;

	i = 0;
	while (!CPU_SUBSET(cpus, &map)) {
	/* spin */
	cpu_spinwait();
	i++;
	if (i == 100000000) {
	printf("timeout stopping cpus\n");
	break;
	}
	}

	#if X86
	if (type == IPI_SUSPEND)
	mtx_unlock_spin(&smp_ipi_mtx);
	#endif

	stopping_cpu = NOCPU;
	return (1);
	}

	int
	stop_cpus(cpuset_t map)
	{

	return (generic_stop_cpus(map, IPI_STOP));
	}

	int
	stop_cpus_hard(cpuset_t map)
	{

	return (generic_stop_cpus(map, IPI_STOP_HARD));
	}

	#if X86
	int
	suspend_cpus(cpuset_t map)
	{

	return (generic_stop_cpus(map, IPI_SUSPEND));
	}
	#endif

	/*
	* Called by a CPU to restart stopped CPUs.
	*
	* Usually (but not necessarily) called with 'stopped_cpus' as its arg.
	*
	* - Signals all CPUs in map to restart.
	* - Waits for each to restart.
	*
	* Returns:
	* -1: error
	* 0: NA
	* 1: ok
	*/
	static int
	generic_restart_cpus(cpuset_t map, u_int type)
	{
	#ifdef KTR
	char cpusetbuf[CPUSETBUFSIZ];
	#endif
	volatile cpuset_t *cpus;

	#if X86
	KASSERT(type == IPI_STOP \|\| type == IPI_STOP_HARD
	\|\| type == IPI_SUSPEND, ("%s: invalid stop type", __func__));

	if (!smp_started)
	return (0);

	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));

	if (type == IPI_SUSPEND)
	cpus = &resuming_cpus;
	else
	cpus = &stopped_cpus;

	/* signal other cpus to restart */
	if (type == IPI_SUSPEND)
	CPU_COPY_STORE_REL(&map, &toresume_cpus);
	else
	CPU_COPY_STORE_REL(&map, &started_cpus);

	/*
	* Wake up any CPUs stopped with MWAIT. From MI code we can't tell if
	* MONITOR/MWAIT is enabled, but the potentially redundant writes are
	* relatively inexpensive.
	*/
	if (type == IPI_STOP) {
	struct monitorbuf *mb;
	u_int id;

	CPU_FOREACH(id) {
	if (!CPU_ISSET(id, &map))
	continue;

	mb = &pcpu_find(id)->pc_monitorbuf;
	atomic_store_int(&mb->stop_state,
	MONITOR_STOPSTATE_RUNNING);
	}
	}

	if (!nmi_is_broadcast \|\| nmi_kdb_lock == 0) {
	/* wait for each to clear its bit */
	while (CPU_OVERLAP(cpus, &map))
	cpu_spinwait();
	}
	#else /* !X86 */
	KASSERT(type == IPI_STOP \|\| type == IPI_STOP_HARD,
	("%s: invalid stop type", __func__));

	if (!smp_started)
	return (0);

	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));

	cpus = &stopped_cpus;

	/* signal other cpus to restart */
	CPU_COPY_STORE_REL(&map, &started_cpus);

	/* wait for each to clear its bit */
	while (CPU_OVERLAP(cpus, &map))
	cpu_spinwait();
	#endif
	return (1);
	}

	int
	restart_cpus(cpuset_t map)
	{

	return (generic_restart_cpus(map, IPI_STOP));
	}

	#if X86
	int
	resume_cpus(cpuset_t map)
	{

	return (generic_restart_cpus(map, IPI_SUSPEND));
	}
	#endif
	#undef X86

	/*
	* All-CPU rendezvous. CPUs are signalled, all execute the setup function
	* (if specified), rendezvous, execute the action function (if specified),
	* rendezvous again, execute the teardown function (if specified), and then
	* resume.
	*
	* Note that the supplied external functions _must_ be reentrant and aware
	* that they are running in parallel and in an unknown lock context.
	*/
	void
	smp_rendezvous_action(void)
	{
	struct thread *td;
	void *local_func_arg;
	void (local_setup_func)(void);
	void (local_action_func)(void);
	void (local_teardown_func)(void);
	#ifdef INVARIANTS
	int owepreempt;
	#endif

	/* Ensure we have up-to-date values. */
	atomic_add_acq_int(&smp_rv_waiters[0], 1);
	while (smp_rv_waiters[0] < smp_rv_ncpus)
	cpu_spinwait();

	/* Fetch rendezvous parameters after acquire barrier. */
	local_func_arg = smp_rv_func_arg;
	local_setup_func = smp_rv_setup_func;
	local_action_func = smp_rv_action_func;
	local_teardown_func = smp_rv_teardown_func;

	/*
	* Use a nested critical section to prevent any preemptions
	* from occurring during a rendezvous action routine.
	* Specifically, if a rendezvous handler is invoked via an IPI
	* and the interrupted thread was in the critical_exit()
	* function after setting td_critnest to 0 but before
	* performing a deferred preemption, this routine can be
	* invoked with td_critnest set to 0 and td_owepreempt true.
	* In that case, a critical_exit() during the rendezvous
	* action would trigger a preemption which is not permitted in
	* a rendezvous action. To fix this, wrap all of the
	* rendezvous action handlers in a critical section. We
	* cannot use a regular critical section however as having
	* critical_exit() preempt from this routine would also be
	* problematic (the preemption must not occur before the IPI
	* has been acknowledged via an EOI). Instead, we
	* intentionally ignore td_owepreempt when leaving the
	* critical section. This should be harmless because we do
	* not permit rendezvous action routines to schedule threads,
	* and thus td_owepreempt should never transition from 0 to 1
	* during this routine.
	*/
	td = curthread;
	td->td_critnest++;
	#ifdef INVARIANTS
	owepreempt = td->td_owepreempt;
	#endif

	/*
	* If requested, run a setup function before the main action
	* function. Ensure all CPUs have completed the setup
	* function before moving on to the action function.
	*/
	if (local_setup_func != smp_no_rendezvous_barrier) {
	if (local_setup_func != NULL)
	local_setup_func(local_func_arg);
	atomic_add_int(&smp_rv_waiters[1], 1);
	while (smp_rv_waiters[1] < smp_rv_ncpus)
	cpu_spinwait();
	}

	if (local_action_func != NULL)
	local_action_func(local_func_arg);

	if (local_teardown_func != smp_no_rendezvous_barrier) {
	/*
	* Signal that the main action has been completed. If a
	* full exit rendezvous is requested, then all CPUs will
	* wait here until all CPUs have finished the main action.
	*/
	atomic_add_int(&smp_rv_waiters[2], 1);
	while (smp_rv_waiters[2] < smp_rv_ncpus)
	cpu_spinwait();

	if (local_teardown_func != NULL)
	local_teardown_func(local_func_arg);
	}

	/*
	* Signal that the rendezvous is fully completed by this CPU.
	* This means that no member of smp_rv_* pseudo-structure will be
	* accessed by this target CPU after this point; in particular,
	* memory pointed by smp_rv_func_arg.
	*
	* The release semantic ensures that all accesses performed by
	* the current CPU are visible when smp_rendezvous_cpus()
	* returns, by synchronizing with the
	* atomic_load_acq_int(&smp_rv_waiters[3]).
	*/
	atomic_add_rel_int(&smp_rv_waiters[3], 1);

	td->td_critnest--;
	KASSERT(owepreempt == td->td_owepreempt,
	("rendezvous action changed td_owepreempt"));
	}

	void
	smp_rendezvous_cpus(cpuset_t map,
	void (* setup_func)(void *),
	void (* action_func)(void *),
	void (* teardown_func)(void *),
	void *arg)
	{
	int curcpumap, i, ncpus = 0;

	/* See comments in the !SMP case. */
	if (!smp_started) {
	spinlock_enter();
	if (setup_func != NULL)
	setup_func(arg);
	if (action_func != NULL)
	action_func(arg);
	if (teardown_func != NULL)
	teardown_func(arg);
	spinlock_exit();
	return;
	}

	/*
	* Make sure we come here with interrupts enabled. Otherwise we
	* livelock if smp_ipi_mtx is owned by a thread which sent us an IPI.
	*/
	MPASS(curthread->td_md.md_spinlock_count == 0);

	CPU_FOREACH(i) {
	if (CPU_ISSET(i, &map))
	ncpus++;
	}
	if (ncpus == 0)
	panic("ncpus is 0 with non-zero map");

	mtx_lock_spin(&smp_ipi_mtx);

	/* Pass rendezvous parameters via global variables. */
	smp_rv_ncpus = ncpus;
	smp_rv_setup_func = setup_func;
	smp_rv_action_func = action_func;
	smp_rv_teardown_func = teardown_func;
	smp_rv_func_arg = arg;
	smp_rv_waiters[1] = 0;
	smp_rv_waiters[2] = 0;
	smp_rv_waiters[3] = 0;
	atomic_store_rel_int(&smp_rv_waiters[0], 0);

	/*
	* Signal other processors, which will enter the IPI with
	* interrupts off.
	*/
	curcpumap = CPU_ISSET(curcpu, &map);
	CPU_CLR(curcpu, &map);
	ipi_selected(map, IPI_RENDEZVOUS);

	/* Check if the current CPU is in the map */
	if (curcpumap != 0)
	smp_rendezvous_action();

	/*
	* Ensure that the master CPU waits for all the other
	* CPUs to finish the rendezvous, so that smp_rv_*
	* pseudo-structure and the arg are guaranteed to not
	* be in use.
	*
	* Load acquire synchronizes with the release add in
	* smp_rendezvous_action(), which ensures that our caller sees
	* all memory actions done by the called functions on other
	* CPUs.
	*/
	while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
	cpu_spinwait();

	mtx_unlock_spin(&smp_ipi_mtx);
	}

	void
	smp_rendezvous(void (* setup_func)(void *),
	void (* action_func)(void *),
	void (* teardown_func)(void *),
	void *arg)
	{
	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
	}

	static void
	smp_topo_fill(struct cpu_group *cg)
	{
	int c;

	for (c = 0; c < cg->cg_children; c++)
	smp_topo_fill(&cg->cg_child[c]);
	cg->cg_first = CPU_FFS(&cg->cg_mask) - 1;
	cg->cg_last = CPU_FLS(&cg->cg_mask) - 1;
	}

	struct cpu_group *
	smp_topo(void)
	{
	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
	static struct cpu_group *top = NULL;

	/*
	* The first call to smp_topo() is guaranteed to occur
	* during the kernel boot while we are still single-threaded.
	*/
	if (top != NULL)
	return (top);

	/*
	* Check for a fake topology request for debugging purposes.
	*/
	switch (smp_topology) {
	case 1:
	/* Dual core with no sharing. */
	top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
	break;
	case 2:
	/* No topology, all cpus are equal. */
	top = smp_topo_none();
	break;
	case 3:
	/* Dual core with shared L2. */
	top = smp_topo_1level(CG_SHARE_L2, 2, 0);
	break;
	case 4:
	/* quad core, shared l3 among each package, private l2. */
	top = smp_topo_1level(CG_SHARE_L3, 4, 0);
	break;
	case 5:
	/* quad core, 2 dualcore parts on each package share l2. */
	top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
	break;
	case 6:
	/* Single-core 2xHTT */
	top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
	break;
	case 7:
	/* quad core with a shared l3, 8 threads sharing L2. */
	top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
	CG_FLAG_SMT);
	break;
	default:
	/* Default, ask the system what it wants. */
	top = cpu_topo();
	break;
	}
	/*
	* Verify the returned topology.
	*/
	if (top->cg_count != mp_ncpus)
	panic("Built bad topology at %p. CPU count %d != %d",
	top, top->cg_count, mp_ncpus);
	if (CPU_CMP(&top->cg_mask, &all_cpus))
	panic("Built bad topology at %p. CPU mask (%s) != (%s)",
	top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
	cpusetobj_strprint(cpusetbuf2, &all_cpus));

	/*
	* Collapse nonsense levels that may be created out of convenience by
	* the MD layers. They cause extra work in the search functions.
	*/
	while (top->cg_children == 1) {
	top = &top->cg_child[0];
	top->cg_parent = NULL;
	}
	smp_topo_fill(top);
	return (top);
	}

	struct cpu_group *
	smp_topo_alloc(u_int count)
	{
	static struct cpu_group *group = NULL;
	static u_int index;
	u_int curr;

	if (group == NULL) {
	group = mallocarray((mp_maxid + 1) * MAX_CACHE_LEVELS + 1,
	sizeof(*group), M_DEVBUF, M_WAITOK \| M_ZERO);
	}
	curr = index;
	index += count;
	return (&group[curr]);
	}

	struct cpu_group *
	smp_topo_none(void)
	{
	struct cpu_group *top;

	top = smp_topo_alloc(1);
	top->cg_parent = NULL;
	top->cg_child = NULL;
	top->cg_mask = all_cpus;
	top->cg_count = mp_ncpus;
	top->cg_children = 0;
	top->cg_level = CG_SHARE_NONE;
	top->cg_flags = 0;
	-
	+#if defined(CPUGRP_SCORE)
	+ memset(top->cg_score, CG_SCORE_DEFAULT, sizeof(top->cg_score));
	+#endif
	return (top);
	}

	static int
	smp_topo_addleaf(struct cpu_group parent, struct cpu_group child, int share,
	int count, int flags, int start)
	{
	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
	cpuset_t mask;
	int i;

	CPU_ZERO(&mask);
	for (i = 0; i < count; i++, start++)
	CPU_SET(start, &mask);
	child->cg_parent = parent;
	child->cg_child = NULL;
	child->cg_children = 0;
	child->cg_level = share;
	child->cg_count = count;
	child->cg_flags = flags;
	child->cg_mask = mask;
	+#if defined(CPUGRP_SCORE)
	+ memset(child->cg_score, CG_SCORE_DEFAULT, sizeof(child->cg_score));
	+#endif
	parent->cg_children++;
	for (; parent != NULL; parent = parent->cg_parent) {
	if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
	panic("Duplicate children in %p. mask (%s) child (%s)",
	parent,
	cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
	cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
	CPU_OR(&parent->cg_mask, &parent->cg_mask, &child->cg_mask);
	parent->cg_count += child->cg_count;
	}

	return (start);
	}

	struct cpu_group *
	smp_topo_1level(int share, int count, int flags)
	{
	struct cpu_group *child;
	struct cpu_group *top;
	int packages;
	int cpu;
	int i;

	cpu = 0;
	packages = mp_ncpus / count;
	top = smp_topo_alloc(1 + packages);
	top->cg_child = child = top + 1;
	top->cg_level = CG_SHARE_NONE;
	for (i = 0; i < packages; i++, child++)
	cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
	return (top);
	}

	struct cpu_group *
	smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
	int l1flags)
	{
	struct cpu_group *top;
	struct cpu_group *l1g;
	struct cpu_group *l2g;
	int cpu;
	int i;
	int j;

	cpu = 0;
	top = smp_topo_alloc(1 + mp_ncpus / (l2count * l1count) +
	mp_ncpus / l1count);
	l2g = top + 1;
	top->cg_child = l2g;
	top->cg_level = CG_SHARE_NONE;
	top->cg_children = mp_ncpus / (l2count * l1count);
	l1g = l2g + top->cg_children;
	for (i = 0; i < top->cg_children; i++, l2g++) {
	l2g->cg_parent = top;
	l2g->cg_child = l1g;
	l2g->cg_level = l2share;
	for (j = 0; j < l2count; j++, l1g++)
	cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
	l1flags, cpu);
	}
	return (top);
	}

	struct cpu_group *
	smp_topo_find(struct cpu_group *top, int cpu)
	{
	struct cpu_group *cg;
	cpuset_t mask;
	int children;
	int i;

	CPU_SETOF(cpu, &mask);
	cg = top;
	for (;;) {
	if (!CPU_OVERLAP(&cg->cg_mask, &mask))
	return (NULL);
	if (cg->cg_children == 0)
	return (cg);
	children = cg->cg_children;
	for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
	if (CPU_OVERLAP(&cg->cg_mask, &mask))
	break;
	}
	return (NULL);
	}
	#else /* !SMP */

	void
	smp_rendezvous_cpus(cpuset_t map,
	void (setup_func)(void ),
	void (action_func)(void ),
	void (teardown_func)(void ),
	void *arg)
	{
	/*
	* In the !SMP case we just need to ensure the same initial conditions
	* as the SMP case.
	*/
	spinlock_enter();
	if (setup_func != NULL)
	setup_func(arg);
	if (action_func != NULL)
	action_func(arg);
	if (teardown_func != NULL)
	teardown_func(arg);
	spinlock_exit();
	}

	void
	smp_rendezvous(void (setup_func)(void ),
	void (action_func)(void ),
	void (teardown_func)(void ),
	void *arg)
	{

	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func,
	arg);
	}

	/*
	* Provide dummy SMP support for UP kernels. Modules that need to use SMP
	* APIs will still work using this dummy support.
	*/
	static void
	mp_setvariables_for_up(void *dummy)
	{
	mp_ncpus = 1;
	mp_ncores = 1;
	mp_maxid = PCPU_GET(cpuid);
	CPU_SETOF(mp_maxid, &all_cpus);
	KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
	}
	SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
	mp_setvariables_for_up, NULL);
	#endif /* SMP */

	void
	smp_no_rendezvous_barrier(void *dummy)
	{
	#ifdef SMP
	KASSERT((!smp_started),("smp_no_rendezvous called and smp is started"));
	#endif
	}

	void
	smp_rendezvous_cpus_retry(cpuset_t map,
	void (* setup_func)(void *),
	void (* action_func)(void *),
	void (* teardown_func)(void *),
	void (* wait_func)(void *, int),
	struct smp_rendezvous_cpus_retry_arg *arg)
	{
	int cpu;

	CPU_COPY(&map, &arg->cpus);

	/*
	* Only one CPU to execute on.
	*/
	if (!smp_started) {
	spinlock_enter();
	if (setup_func != NULL)
	setup_func(arg);
	if (action_func != NULL)
	action_func(arg);
	if (teardown_func != NULL)
	teardown_func(arg);
	spinlock_exit();
	return;
	}

	/*
	* Execute an action on all specified CPUs while retrying until they
	* all acknowledge completion.
	*/
	for (;;) {
	smp_rendezvous_cpus(
	arg->cpus,
	setup_func,
	action_func,
	teardown_func,
	arg);

	if (CPU_EMPTY(&arg->cpus))
	break;

	CPU_FOREACH(cpu) {
	if (!CPU_ISSET(cpu, &arg->cpus))
	continue;
	wait_func(arg, cpu);
	}
	}
	}

	void
	smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *arg)
	{

	CPU_CLR_ATOMIC(curcpu, &arg->cpus);
	}

	/*
	* If (prio & PDROP) == 0:
	* Wait for specified idle threads to switch once. This ensures that even
	* preempted threads have cycled through the switch function once,
	* exiting their codepaths. This allows us to change global pointers
	* with no other synchronization.
	* If (prio & PDROP) != 0:
	* Force the specified CPUs to switch context at least once.
	*/
	int
	quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
	{
	struct pcpu *pcpu;
	u_int *gen;
	int error;
	int cpu;

	error = 0;
	if ((prio & PDROP) == 0) {
	gen = mallocarray(sizeof(u_int), mp_maxid + 1, M_TEMP,
	M_WAITOK);
	for (cpu = 0; cpu <= mp_maxid; cpu++) {
	if (!CPU_ISSET(cpu, &map) \|\| CPU_ABSENT(cpu))
	continue;
	pcpu = pcpu_find(cpu);
	gen[cpu] = pcpu->pc_idlethread->td_generation;
	}
	}
	for (cpu = 0; cpu <= mp_maxid; cpu++) {
	if (!CPU_ISSET(cpu, &map) \|\| CPU_ABSENT(cpu))
	continue;
	pcpu = pcpu_find(cpu);
	thread_lock(curthread);
	sched_bind(curthread, cpu);
	thread_unlock(curthread);
	if ((prio & PDROP) != 0)
	continue;
	while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
	error = tsleep(quiesce_cpus, prio & ~PDROP, wmesg, 1);
	if (error != EWOULDBLOCK)
	goto out;
	error = 0;
	}
	}
	out:
	thread_lock(curthread);
	sched_unbind(curthread);
	thread_unlock(curthread);
	if ((prio & PDROP) == 0)
	free(gen, M_TEMP);

	return (error);
	}

	int
	quiesce_all_cpus(const char *wmesg, int prio)
	{

	return quiesce_cpus(all_cpus, wmesg, prio);
	}

	/*
	* Observe all CPUs not executing in critical section.
	* We are not in one so the check for us is safe. If the found
	* thread changes to something else we know the section was
	* exited as well.
	*/
	void
	quiesce_all_critical(void)
	{
	struct thread td, newtd;
	struct pcpu *pcpu;
	int cpu;

	MPASS(curthread->td_critnest == 0);

	CPU_FOREACH(cpu) {
	pcpu = cpuid_to_pcpu[cpu];
	td = pcpu->pc_curthread;
	for (;;) {
	if (td->td_critnest == 0)
	break;
	cpu_spinwait();
	newtd = (struct thread *)
	atomic_load_acq_ptr((void *)pcpu->pc_curthread);
	if (td != newtd)
	break;
	}
	}
	}

	static void
	cpus_fence_seq_cst_issue(void *arg __unused)
	{

	atomic_thread_fence_seq_cst();
	}

	/*
	* Send an IPI forcing a sequentially consistent fence.
	*
	* Allows replacement of an explicitly fence with a compiler barrier.
	* Trades speed up during normal execution for a significant slowdown when
	* the barrier is needed.
	*/
	void
	cpus_fence_seq_cst(void)
	{

	#ifdef SMP
	smp_rendezvous(
	smp_no_rendezvous_barrier,
	cpus_fence_seq_cst_issue,
	smp_no_rendezvous_barrier,
	NULL
	);
	#else
	cpus_fence_seq_cst_issue(NULL);
	#endif
	}

	/* Extra care is taken with this sysctl because the data type is volatile */
	static int
	sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
	{
	int error, active;

	active = smp_started;
	error = SYSCTL_OUT(req, &active, sizeof(active));
	return (error);
	}

	#ifdef SMP
	void
	topo_init_node(struct topo_node *node)
	{

	bzero(node, sizeof(*node));
	TAILQ_INIT(&node->children);
	}

	void
	topo_init_root(struct topo_node *root)
	{

	topo_init_node(root);
	root->type = TOPO_TYPE_SYSTEM;
	}

	/*
	* Add a child node with the given ID under the given parent.
	* Do nothing if there is already a child with that ID.
	*/
	struct topo_node *
	topo_add_node_by_hwid(struct topo_node *parent, int hwid,
	topo_node_type type, uintptr_t subtype)
	{
	struct topo_node *node;

	TAILQ_FOREACH_REVERSE(node, &parent->children,
	topo_children, siblings) {
	if (node->hwid == hwid
	&& node->type == type && node->subtype == subtype) {
	return (node);
	}
	}

	node = malloc(sizeof(*node), M_TOPO, M_WAITOK);
	topo_init_node(node);
	node->parent = parent;
	node->hwid = hwid;
	node->type = type;
	node->subtype = subtype;
	TAILQ_INSERT_TAIL(&parent->children, node, siblings);
	parent->nchildren++;

	return (node);
	}

	/*
	* Find a child node with the given ID under the given parent.
	*/
	struct topo_node *
	topo_find_node_by_hwid(struct topo_node *parent, int hwid,
	topo_node_type type, uintptr_t subtype)
	{

	struct topo_node *node;

	TAILQ_FOREACH(node, &parent->children, siblings) {
	if (node->hwid == hwid
	&& node->type == type && node->subtype == subtype) {
	return (node);
	}
	}

	return (NULL);
	}

	/*
	* Given a node change the order of its parent's child nodes such
	* that the node becomes the firt child while preserving the cyclic
	* order of the children. In other words, the given node is promoted
	* by rotation.
	*/
	void
	topo_promote_child(struct topo_node *child)
	{
	struct topo_node *next;
	struct topo_node *node;
	struct topo_node *parent;

	parent = child->parent;
	next = TAILQ_NEXT(child, siblings);
	TAILQ_REMOVE(&parent->children, child, siblings);
	TAILQ_INSERT_HEAD(&parent->children, child, siblings);

	while (next != NULL) {
	node = next;
	next = TAILQ_NEXT(node, siblings);
	TAILQ_REMOVE(&parent->children, node, siblings);
	TAILQ_INSERT_AFTER(&parent->children, child, node, siblings);
	child = node;
	}
	}

	/*
	* Iterate to the next node in the depth-first search (traversal) of
	* the topology tree.
	*/
	struct topo_node *
	topo_next_node(struct topo_node top, struct topo_node node)
	{
	struct topo_node *next;

	if ((next = TAILQ_FIRST(&node->children)) != NULL)
	return (next);

	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	return (next);

	while (node != top && (node = node->parent) != top)
	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	return (next);

	return (NULL);
	}

	/*
	* Iterate to the next node in the depth-first search of the topology tree,
	* but without descending below the current node.
	*/
	struct topo_node *
	topo_next_nonchild_node(struct topo_node top, struct topo_node node)
	{
	struct topo_node *next;

	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	return (next);

	while (node != top && (node = node->parent) != top)
	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	return (next);

	return (NULL);
	}

	/*
	* Assign the given ID to the given topology node that represents a logical
	* processor.
	*/
	void
	topo_set_pu_id(struct topo_node *node, cpuid_t id)
	{

	KASSERT(node->type == TOPO_TYPE_PU,
	("topo_set_pu_id: wrong node type: %u", node->type));
	KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0,
	("topo_set_pu_id: cpuset already not empty"));
	node->id = id;
	CPU_SET(id, &node->cpuset);
	node->cpu_count = 1;
	node->subtype = 1;

	while ((node = node->parent) != NULL) {
	KASSERT(!CPU_ISSET(id, &node->cpuset),
	("logical ID %u is already set in node %p", id, node));
	CPU_SET(id, &node->cpuset);
	node->cpu_count++;
	}
	}

	static struct topology_spec {
	topo_node_type type;
	bool match_subtype;
	uintptr_t subtype;
	} topology_level_table[TOPO_LEVEL_COUNT] = {
	[TOPO_LEVEL_PKG] = { .type = TOPO_TYPE_PKG, },
	[TOPO_LEVEL_GROUP] = { .type = TOPO_TYPE_GROUP, },
	[TOPO_LEVEL_CACHEGROUP] = {
	.type = TOPO_TYPE_CACHE,
	.match_subtype = true,
	.subtype = CG_SHARE_L3,
	},
	[TOPO_LEVEL_CORE] = { .type = TOPO_TYPE_CORE, },
	[TOPO_LEVEL_THREAD] = { .type = TOPO_TYPE_PU, },
	};

	static bool
	topo_analyze_table(struct topo_node *root, int all, enum topo_level level,
	struct topo_analysis *results)
	{
	struct topology_spec *spec;
	struct topo_node *node;
	int count;

	if (level >= TOPO_LEVEL_COUNT)
	return (true);

	spec = &topology_level_table[level];
	count = 0;
	node = topo_next_node(root, root);

	while (node != NULL) {
	if (node->type != spec->type \|\|
	(spec->match_subtype && node->subtype != spec->subtype)) {
	node = topo_next_node(root, node);
	continue;
	}
	if (!all && CPU_EMPTY(&node->cpuset)) {
	node = topo_next_nonchild_node(root, node);
	continue;
	}

	count++;

	if (!topo_analyze_table(node, all, level + 1, results))
	return (false);

	node = topo_next_nonchild_node(root, node);
	}

	/* No explicit subgroups is essentially one subgroup. */
	if (count == 0) {
	count = 1;

	if (!topo_analyze_table(root, all, level + 1, results))
	return (false);
	}

	if (results->entities[level] == -1)
	results->entities[level] = count;
	else if (results->entities[level] != count)
	return (false);

	return (true);
	}

	/*
	* Check if the topology is uniform, that is, each package has the same number
	* of cores in it and each core has the same number of threads (logical
	* processors) in it. If so, calculate the number of packages, the number of
	* groups per package, the number of cachegroups per group, and the number of
	* logical processors per cachegroup. 'all' parameter tells whether to include
	* administratively disabled logical processors into the analysis.
	*/
	int
	topo_analyze(struct topo_node *topo_root, int all,
	struct topo_analysis *results)
	{

	results->entities[TOPO_LEVEL_PKG] = -1;
	results->entities[TOPO_LEVEL_CORE] = -1;
	results->entities[TOPO_LEVEL_THREAD] = -1;
	results->entities[TOPO_LEVEL_GROUP] = -1;
	results->entities[TOPO_LEVEL_CACHEGROUP] = -1;

	if (!topo_analyze_table(topo_root, all, TOPO_LEVEL_PKG, results))
	return (0);

	KASSERT(results->entities[TOPO_LEVEL_PKG] > 0,
	("bug in topology or analysis"));

	return (1);
	}

	#endif /* SMP */
	diff --git a/sys/sys/smp.h b/sys/sys/smp.h
	index 252dc9dc1cae..735cad0439a3 100644
	--- a/sys/sys/smp.h
	+++ b/sys/sys/smp.h
	@@ -1,298 +1,311 @@
	/*-
	* SPDX-License-Identifier: Beerware
	*
	* ----------------------------------------------------------------------------
	* "THE BEER-WARE LICENSE" (Revision 42):
	* <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
	* can do whatever you want with this stuff. If we meet some day, and you think
	* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
	* ----------------------------------------------------------------------------
	*/

	#ifndef _SYS_SMP_H_
	#define _SYS_SMP_H_

	#ifdef _KERNEL

	#ifndef LOCORE

	#include <sys/cpuset.h>
	#include <sys/queue.h>

	+#include "opt_global.h"
	+
	/*
	* Types of nodes in the topological tree.
	*/
	typedef enum {
	/* No node has this type; can be used in topo API calls. */
	TOPO_TYPE_DUMMY,
	/* Processing unit aka computing unit aka logical CPU. */
	TOPO_TYPE_PU,
	/* Physical subdivision of a package. */
	TOPO_TYPE_CORE,
	/* CPU L1/L2/L3 cache. */
	TOPO_TYPE_CACHE,
	/* Package aka chip, equivalent to socket. */
	TOPO_TYPE_PKG,
	/* NUMA node. */
	TOPO_TYPE_NODE,
	/* Other logical or physical grouping of PUs. */
	/* E.g. PUs on the same dye, or PUs sharing an FPU. */
	TOPO_TYPE_GROUP,
	/* The whole system. */
	TOPO_TYPE_SYSTEM
	} topo_node_type;

	/* Hardware indenitifier of a topology component. */
	typedef unsigned int hwid_t;
	/* Logical CPU idenitifier. */
	typedef int cpuid_t;

	/* A node in the topology. */
	struct topo_node {
	struct topo_node *parent;
	TAILQ_HEAD(topo_children, topo_node) children;
	TAILQ_ENTRY(topo_node) siblings;
	cpuset_t cpuset;
	topo_node_type type;
	uintptr_t subtype;
	hwid_t hwid;
	cpuid_t id;
	int nchildren;
	int cpu_count;
	};

	/*
	* Scheduling topology of a NUMA or SMP system.
	*
	* The top level topology is an array of pointers to groups. Each group
	* contains a bitmask of cpus in its group or subgroups. It may also
	* contain a pointer to an array of child groups.
	*
	* The bitmasks at non leaf groups may be used by consumers who support
	* a smaller depth than the hardware provides.
	*
	* The topology may be omitted by systems where all CPUs are equal.
	*/

	+#if defined(CPUGRP_SCORE)
	+#define CG_SCORE_CLASS_MAX 8
	+#define CG_SCORE_CAPABILITY_MAX 2
	+
	+#define CG_SCORE_DEFAULT 0x80
	+#endif
	+
	struct cpu_group {
	struct cpu_group cg_parent; / Our parent group. */
	struct cpu_group cg_child; / Optional children groups. */
	cpuset_t cg_mask; /* Mask of cpus in this group. */
	int32_t cg_count; /* Count of cpus in this group. */
	int32_t cg_first; /* First cpu in this group. */
	int32_t cg_last; /* Last cpu in this group. */
	int16_t cg_children; /* Number of children groups. */
	int8_t cg_level; /* Shared cache level. */
	int8_t cg_flags; /* Traversal modifiers. */
	+#if defined(CPUGRP_SCORE)
	+ uint8_t cg_score[CG_SCORE_CLASS_MAX][CG_SCORE_CAPABILITY_MAX];
	+ /* Performance/Efficiency Score from Intel HFI/ITD */
	+#endif
	};

	typedef struct cpu_group *cpu_group_t;

	/*
	* Defines common resources for CPUs in the group. The highest level
	* resource should be used when multiple are shared.
	*/
	#define CG_SHARE_NONE 0
	#define CG_SHARE_L1 1
	#define CG_SHARE_L2 2
	#define CG_SHARE_L3 3

	#define MAX_CACHE_LEVELS CG_SHARE_L3

	/*
	* Behavior modifiers for load balancing and affinity.
	*/
	#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */
	#define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */
	#define CG_FLAG_THREAD (CG_FLAG_HTT \| CG_FLAG_SMT) /* Any threading. */
	#define CG_FLAG_NODE 0x04 /* NUMA node. */

	/*
	* Convenience routines for building and traversing topologies.
	*/
	#ifdef SMP
	void topo_init_node(struct topo_node *node);
	void topo_init_root(struct topo_node *root);
	struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
	topo_node_type type, uintptr_t subtype);
	struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
	topo_node_type type, uintptr_t subtype);
	void topo_promote_child(struct topo_node *child);
	struct topo_node * topo_next_node(struct topo_node *top,
	struct topo_node *node);
	struct topo_node * topo_next_nonchild_node(struct topo_node *top,
	struct topo_node *node);
	void topo_set_pu_id(struct topo_node *node, cpuid_t id);

	enum topo_level {
	TOPO_LEVEL_PKG = 0,
	/*
	* Some systems have useful sub-package core organizations. On these,
	* a package has one or more subgroups. Each subgroup contains one or
	* more cache groups (cores that share a last level cache).
	*/
	TOPO_LEVEL_GROUP,
	TOPO_LEVEL_CACHEGROUP,
	TOPO_LEVEL_CORE,
	TOPO_LEVEL_THREAD,
	TOPO_LEVEL_COUNT /* Must be last */
	};
	struct topo_analysis {
	int entities[TOPO_LEVEL_COUNT];
	};
	int topo_analyze(struct topo_node *topo_root, int all,
	struct topo_analysis *results);

	#define TOPO_FOREACH(i, root) \
	for (i = root; i != NULL; i = topo_next_node(root, i))

	struct cpu_group *smp_topo(void);
	struct cpu_group *smp_topo_alloc(u_int count);
	struct cpu_group *smp_topo_none(void);
	struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
	struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
	int l1count, int l1flags);
	struct cpu_group smp_topo_find(struct cpu_group top, int cpu);

	extern void (*cpustop_restartfunc)(void);
	/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
	extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */
	extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */
	extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */
	extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
	extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */
	extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */
	extern cpuset_t logical_cpus_mask;
	#endif /* SMP */

	extern u_int mp_maxid;
	extern int mp_maxcpus;
	extern int mp_ncores;
	extern int mp_ncpus;
	extern int smp_cpus;
	extern volatile int smp_started;
	extern int smp_threads_per_core;

	extern cpuset_t all_cpus;
	extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */

	struct pcb;
	extern struct pcb *stoppcbs;

	/*
	* Macro allowing us to determine whether a CPU is absent at any given
	* time, thus permitting us to configure sparse maps of cpuid-dependent
	* (per-CPU) structures.
	*/
	#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus))

	/*
	* Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an
	* integer iterator and iterates over the available set of CPUs.
	* CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT()
	* returns the id of the next non-absent CPU. It will wrap back to
	* CPU_FIRST() once the end of the list is reached. The iterators are
	* currently implemented via inline functions.
	*/
	#define CPU_FOREACH(i) \
	for ((i) = 0; (i) <= mp_maxid; (i)++) \
	if (!CPU_ABSENT((i)))

	static __inline int
	cpu_first(void)
	{
	int i;

	for (i = 0;; i++)
	if (!CPU_ABSENT(i))
	return (i);
	}

	static __inline int
	cpu_next(int i)
	{

	for (;;) {
	i++;
	if ((u_int)i > mp_maxid)
	i = 0;
	if (!CPU_ABSENT(i))
	return (i);
	}
	}

	#define CPU_FIRST() cpu_first()
	#define CPU_NEXT(i) cpu_next((i))

	#ifdef SMP
	/*
	* Machine dependent functions used to initialize MP support.
	*
	* The cpu_mp_probe() should check to see if MP support is present and return
	* zero if it is not or non-zero if it is. If MP support is present, then
	* cpu_mp_start() will be called so that MP can be enabled. This function
	* should do things such as startup secondary processors. It should also
	* setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that
	* smp_started is initialized at the appropriate time.
	* Once cpu_mp_start() returns, machine independent MP startup code will be
	* executed and a simple message will be output to the console. Finally,
	* cpu_mp_announce() will be called so that machine dependent messages about
	* the MP support may be output to the console if desired.
	*
	* The cpu_setmaxid() function is called very early during the boot process
	* so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
	* that other subsystems may use. If a platform is not able to determine
	* the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
	*/
	struct thread;

	struct cpu_group *cpu_topo(void);
	void cpu_mp_announce(void);
	int cpu_mp_probe(void);
	void cpu_mp_setmaxid(void);
	void cpu_mp_start(void);

	void forward_signal(struct thread *);
	int restart_cpus(cpuset_t);
	int stop_cpus(cpuset_t);
	int stop_cpus_hard(cpuset_t);
	#if defined(__amd64__) \|\| defined(__i386__)
	int suspend_cpus(cpuset_t);
	int resume_cpus(cpuset_t);
	#endif

	void smp_rendezvous_action(void);
	extern struct mtx smp_ipi_mtx;

	#endif /* SMP */

	int quiesce_all_cpus(const char *, int);
	int quiesce_cpus(cpuset_t, const char *, int);
	void quiesce_all_critical(void);
	void cpus_fence_seq_cst(void);
	void smp_no_rendezvous_barrier(void *);
	void smp_rendezvous(void ()(void ),
	void ()(void ),
	void ()(void ),
	void *arg);
	void smp_rendezvous_cpus(cpuset_t,
	void ()(void ),
	void ()(void ),
	void ()(void ),
	void *arg);

	struct smp_rendezvous_cpus_retry_arg {
	cpuset_t cpus;
	};
	void smp_rendezvous_cpus_retry(cpuset_t,
	void ()(void ),
	void ()(void ),
	void ()(void ),
	void ()(void , int),
	struct smp_rendezvous_cpus_retry_arg *);

	void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *);

	#endif /* !LOCORE */
	#endif /* _KERNEL */
	#endif /* _SYS_SMP_H_ */
	diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
	index 1027c2c8972b..5d9a57c8febe 100644
	--- a/sys/x86/x86/mp_x86.c
	+++ b/sys/x86/x86/mp_x86.c
	@@ -1,1749 +1,1757 @@
	/*-
	* Copyright (c) 1996, by Steve Passe
	* Copyright (c) 2003, by Peter Wemm
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. The name of the developer may NOT be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#include <sys/cdefs.h>
	#include "opt_acpi.h"
	#ifdef __i386__
	#include "opt_apic.h"
	#endif
	#include "opt_cpu.h"
	#include "opt_ddb.h"
	#include "opt_gdb.h"
	#include "opt_kstack_pages.h"
	#include "opt_pmap.h"
	#include "opt_sched.h"
	#include "opt_smp.h"
	#include "opt_stack.h"
	+#include "opt_global.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/asan.h>
	#include <sys/bus.h>
	#include <sys/cons.h> /* cngetc() */
	#include <sys/cpuset.h>
	#include <sys/csan.h>
	#include <sys/interrupt.h>
	#include <sys/kdb.h>
	#include <sys/kernel.h>
	#include <sys/ktr.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/memrange.h>
	#include <sys/mutex.h>
	#include <sys/pcpu.h>
	#include <sys/proc.h>
	#include <sys/sched.h>
	#include <sys/smp.h>
	#include <sys/sysctl.h>

	#include <vm/vm.h>
	#include <vm/vm_param.h>
	#include <vm/pmap.h>
	#include <vm/vm_kern.h>
	#include <vm/vm_extern.h>
	#include <vm/vm_map.h>

	#include <x86/apicreg.h>
	#include <machine/clock.h>
	#include <machine/cpu.h>
	#include <machine/cputypes.h>
	#include <x86/mca.h>
	#include <machine/md_var.h>
	#include <machine/pcb.h>
	#include <machine/psl.h>
	#include <machine/smp.h>
	#include <machine/specialreg.h>
	#include <machine/stack.h>
	#include <x86/ucode.h>

	#ifdef DEV_ACPI
	#include <contrib/dev/acpica/include/acpi.h>
	#include <dev/acpica/acpivar.h>
	#endif

	static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items");

	int mp_naps; /* # of Applications processors */
	int boot_cpu_id = -1; /* designated BSP */

	/* AP uses this during bootstrap. Do not staticize. */
	char *bootSTK;
	int bootAP;

	/* Free these after use */
	void *bootstacks[MAXCPU];
	void *dpcpu;

	struct susppcb **susppcbs;

	#ifdef COUNT_IPIS
	/* Interrupt counts. */
	static u_long *ipi_preempt_counts[MAXCPU];
	static u_long *ipi_ast_counts[MAXCPU];
	u_long *ipi_invltlb_counts[MAXCPU];
	u_long *ipi_invlrng_counts[MAXCPU];
	u_long *ipi_invlpg_counts[MAXCPU];
	u_long *ipi_invlcache_counts[MAXCPU];
	u_long *ipi_rendezvous_counts[MAXCPU];
	static u_long *ipi_hardclock_counts[MAXCPU];
	#endif

	/* Default cpu_ops implementation. */
	struct cpu_ops cpu_ops;

	/*
	* Local data and functions.
	*/

	static volatile cpuset_t ipi_stop_nmi_pending;

	volatile cpuset_t resuming_cpus;
	volatile cpuset_t toresume_cpus;

	/* used to hold the AP's until we are ready to release them */
	struct mtx ap_boot_mtx;

	/* Set to 1 once we're ready to let the APs out of the pen. */
	volatile int aps_ready = 0;

	/*
	* Store data from cpu_add() until later in the boot when we actually setup
	* the APs.
	*/
	struct cpu_info *cpu_info;
	int *apic_cpuids;
	int cpu_apic_ids[MAXCPU];
	_Static_assert(MAXCPU <= MAX_APIC_ID,
	"MAXCPU cannot be larger that MAX_APIC_ID");
	_Static_assert(xAPIC_MAX_APIC_ID <= MAX_APIC_ID,
	"xAPIC_MAX_APIC_ID cannot be larger that MAX_APIC_ID");

	static void release_aps(void *dummy);
	static void cpustop_handler_post(u_int cpu);

	static int hyperthreading_allowed = 1;
	SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN,
	&hyperthreading_allowed, 0, "Use Intel HTT logical CPUs");

	static int hyperthreading_intr_allowed = 0;
	SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_intr_allowed, CTLFLAG_RDTUN,
	&hyperthreading_intr_allowed, 0,
	"Allow interrupts on HTT logical CPUs");

	static int intr_apic_id_limit = -1;
	SYSCTL_INT(_machdep, OID_AUTO, intr_apic_id_limit, CTLFLAG_RDTUN,
	&intr_apic_id_limit, 0,
	"Maximum permitted APIC ID for interrupt delivery (-1 is unlimited)");

	static struct topo_node topo_root;

	static int pkg_id_shift;
	static int node_id_shift;
	static int core_id_shift;
	static int disabled_cpus;

	struct cache_info {
	int id_shift;
	int present;
	} static caches[MAX_CACHE_LEVELS];

	static bool stop_mwait = false;
	SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
	"Use MONITOR/MWAIT when stopping CPU, if available");

	void
	mem_range_AP_init(void)
	{

	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
	mem_range_softc.mr_op->initAP(&mem_range_softc);
	}

	/*
	* Round up to the next power of two, if necessary, and then
	* take log2.
	* Returns -1 if argument is zero.
	*/
	static __inline int
	mask_width(u_int x)
	{

	return (fls(x << (1 - powerof2(x))) - 1);
	}

	/*
	* Add a cache level to the cache topology description.
	*/
	static int
	add_deterministic_cache(int type, int level, int share_count)
	{

	if (type == 0)
	return (0);
	if (type > 3) {
	printf("unexpected cache type %d\n", type);
	return (1);
	}
	if (type == 2) /* ignore instruction cache */
	return (1);
	if (level == 0 \|\| level > MAX_CACHE_LEVELS) {
	printf("unexpected cache level %d\n", level);
	return (1);
	}

	if (caches[level - 1].present) {
	printf("WARNING: multiple entries for L%u data cache\n", level);
	printf("%u => %u\n", caches[level - 1].id_shift,
	mask_width(share_count));
	}
	caches[level - 1].id_shift = mask_width(share_count);
	caches[level - 1].present = 1;

	if (caches[level - 1].id_shift > pkg_id_shift) {
	printf("WARNING: L%u data cache covers more "
	"APIC IDs than a package (%u > %u)\n", level,
	caches[level - 1].id_shift, pkg_id_shift);
	caches[level - 1].id_shift = pkg_id_shift;
	}
	if (caches[level - 1].id_shift < core_id_shift) {
	printf("WARNING: L%u data cache covers fewer "
	"APIC IDs than a core (%u < %u)\n", level,
	caches[level - 1].id_shift, core_id_shift);
	caches[level - 1].id_shift = core_id_shift;
	}

	return (1);
	}

	/*
	* Determine topology of processing units and caches for AMD CPUs.
	* See:
	* - AMD CPUID Specification (Publication # 25481)
	* - BKDG for AMD NPT Family 0Fh Processors (Publication # 32559)
	* - BKDG For AMD Family 10h Processors (Publication # 31116)
	* - BKDG For AMD Family 15h Models 00h-0Fh Processors (Publication # 42301)
	* - BKDG For AMD Family 16h Models 00h-0Fh Processors (Publication # 48751)
	* - PPR For AMD Family 17h Models 00h-0Fh Processors (Publication # 54945)
	*/
	static void
	topo_probe_amd(void)
	{
	u_int p[4];
	uint64_t v;
	int level;
	int nodes_per_socket;
	int share_count;
	int type;
	int i;

	/* No multi-core capability. */
	if ((amd_feature2 & AMDID2_CMP) == 0)
	return;

	/*
	* XXX Lack of an AMD IOMMU driver prevents use of APIC IDs above
	* xAPIC_MAX_APIC_ID. This is a workaround so we boot and function on
	* AMD systems with high thread counts, albeit with reduced interrupt
	* performance.
	*
	* We should really set the limit to xAPIC_MAX_APIC_ID by default, and
	* have the IOMMU driver increase it. That way if a driver is present
	* but disabled, or is otherwise not able to route the interrupts, the
	* system can fall back to a functional state. That will require a more
	* substantial change though, including having the IOMMU initialize
	* earlier.
	*/
	if (intr_apic_id_limit == -1)
	intr_apic_id_limit = xAPIC_MAX_APIC_ID;

	/* For families 10h and newer. */
	pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
	AMDID_COREID_SIZE_SHIFT;

	/* For 0Fh family. */
	if (pkg_id_shift == 0)
	pkg_id_shift =
	mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1);

	/*
	* Families prior to 16h define the following value as
	* cores per compute unit and we don't really care about the AMD
	* compute units at the moment. Perhaps we should treat them as
	* cores and cores within the compute units as hardware threads,
	* but that's up for debate.
	* Later families define the value as threads per compute unit,
	* so we are following AMD's nomenclature here.
	*/
	if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 &&
	CPUID_TO_FAMILY(cpu_id) >= 0x16) {
	cpuid_count(0x8000001e, 0, p);
	share_count = ((p[1] >> 8) & 0xff) + 1;
	core_id_shift = mask_width(share_count);

	/*
	* For Zen (17h), gather Nodes per Processor. Each node is a
	* Zeppelin die; TR and EPYC CPUs will have multiple dies per
	* package. Communication latency between dies is higher than
	* within them.
	*/
	nodes_per_socket = ((p[2] >> 8) & 0x7) + 1;
	node_id_shift = pkg_id_shift - mask_width(nodes_per_socket);
	}

	if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) {
	for (i = 0; ; i++) {
	cpuid_count(0x8000001d, i, p);
	type = p[0] & 0x1f;
	level = (p[0] >> 5) & 0x7;
	share_count = 1 + ((p[0] >> 14) & 0xfff);

	if (!add_deterministic_cache(type, level, share_count))
	break;
	}
	} else {
	if (cpu_exthigh >= 0x80000005) {
	cpuid_count(0x80000005, 0, p);
	if (((p[2] >> 24) & 0xff) != 0) {
	caches[0].id_shift = 0;
	caches[0].present = 1;
	}
	}
	if (cpu_exthigh >= 0x80000006) {
	cpuid_count(0x80000006, 0, p);
	if (((p[2] >> 16) & 0xffff) != 0) {
	caches[1].id_shift = 0;
	caches[1].present = 1;
	}
	if (((p[3] >> 18) & 0x3fff) != 0) {
	nodes_per_socket = 1;
	if ((amd_feature2 & AMDID2_NODE_ID) != 0) {
	/*
	* Handle multi-node processors that
	* have multiple chips, each with its
	* own L3 cache, on the same die.
	*/
	v = rdmsr(0xc001100c);
	nodes_per_socket = 1 + ((v >> 3) & 0x7);
	}
	caches[2].id_shift =
	pkg_id_shift - mask_width(nodes_per_socket);
	caches[2].present = 1;
	}
	}
	}
	}

	/*
	* Determine topology of processing units for Intel CPUs
	* using CPUID Leaf 1 and Leaf 4, if supported.
	* See:
	* - Intel 64 Architecture Processor Topology Enumeration
	* - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
	* Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
	* FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
	*/
	static void
	topo_probe_intel_0x4(void)
	{
	u_int p[4];
	int max_cores;
	int max_logical;

	/* Both zero and one here mean one logical processor per package. */
	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
	(cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
	if (max_logical <= 1)
	return;

	if (cpu_high >= 0x4) {
	cpuid_count(0x04, 0, p);
	max_cores = ((p[0] >> 26) & 0x3f) + 1;
	} else
	max_cores = 1;

	core_id_shift = mask_width(max_logical/max_cores);
	KASSERT(core_id_shift >= 0,
	("intel topo: max_cores > max_logical\n"));
	pkg_id_shift = core_id_shift + mask_width(max_cores);
	}

	/*
	* Determine topology of processing units for Intel CPUs
	* using CPUID Leaf 1Fh or 0Bh, if supported.
	* See:
	* - Intel 64 Architecture Processor Topology Enumeration
	* - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
	* Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
	* FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
	*/
	static void
	topo_probe_intel_0xb(void)
	{
	u_int leaf;
	u_int p[4] = { 0 };
	int bits;
	int type;
	int i;

	/* Prefer leaf 1Fh (V2 Extended Topology Enumeration). */
	if (cpu_high >= 0x1f) {
	leaf = 0x1f;
	cpuid_count(leaf, 0, p);
	}
	/* Fall back to leaf 0Bh (Extended Topology Enumeration). */
	if (p[1] == 0) {
	leaf = 0x0b;
	cpuid_count(leaf, 0, p);
	}
	/* Fall back to leaf 04h (Deterministic Cache Parameters). */
	if (p[1] == 0) {
	topo_probe_intel_0x4();
	return;
	}

	/* We only support three levels for now. */
	for (i = 0; ; i++) {
	cpuid_count(leaf, i, p);

	bits = p[0] & 0x1f;
	type = (p[2] >> 8) & 0xff;

	if (type == 0)
	break;

	if (type == CPUID_TYPE_SMT)
	core_id_shift = bits;
	else if (type == CPUID_TYPE_CORE)
	pkg_id_shift = bits;
	else if (bootverbose)
	printf("Topology level type %d shift: %d\n", type, bits);
	}

	if (pkg_id_shift < core_id_shift) {
	printf("WARNING: core covers more APIC IDs than a package\n");
	core_id_shift = pkg_id_shift;
	}
	}

	/*
	* Determine topology of caches for Intel CPUs.
	* See:
	* - Intel 64 Architecture Processor Topology Enumeration
	* - Intel 64 and IA-32 Architectures Software Developer’s Manual
	* Volume 2A: Instruction Set Reference, A-M,
	* CPUID instruction
	*/
	static void
	topo_probe_intel_caches(void)
	{
	u_int p[4];
	int level;
	int share_count;
	int type;
	int i;

	if (cpu_high < 0x4) {
	/*
	* Available cache level and sizes can be determined
	* via CPUID leaf 2, but that requires a huge table of hardcoded
	* values, so for now just assume L1 and L2 caches potentially
	* shared only by HTT processing units, if HTT is present.
	*/
	caches[0].id_shift = pkg_id_shift;
	caches[0].present = 1;
	caches[1].id_shift = pkg_id_shift;
	caches[1].present = 1;
	return;
	}

	for (i = 0; ; i++) {
	cpuid_count(0x4, i, p);
	type = p[0] & 0x1f;
	level = (p[0] >> 5) & 0x7;
	share_count = 1 + ((p[0] >> 14) & 0xfff);

	if (!add_deterministic_cache(type, level, share_count))
	break;
	}
	}

	/*
	* Determine topology of processing units and caches for Intel CPUs.
	* See:
	* - Intel 64 Architecture Processor Topology Enumeration
	*/
	static void
	topo_probe_intel(void)
	{

	/*
	* Note that 0x1 <= cpu_high < 4 case should be
	* compatible with topo_probe_intel_0x4() logic when
	* CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
	* or it should trigger the fallback otherwise.
	*/
	if (cpu_high >= 0xb)
	topo_probe_intel_0xb();
	else if (cpu_high >= 0x1)
	topo_probe_intel_0x4();

	topo_probe_intel_caches();
	}

	/*
	* Topology information is queried only on BSP, on which this
	* code runs and for which it can query CPUID information.
	* Then topology is extrapolated on all packages using an
	* assumption that APIC ID to hardware component ID mapping is
	* homogenious.
	* That doesn't necesserily imply that the topology is uniform.
	*/
	void
	topo_probe(void)
	{
	static int cpu_topo_probed = 0;
	struct x86_topo_layer {
	int type;
	int subtype;
	int id_shift;
	} topo_layers[MAX_CACHE_LEVELS + 5];
	struct topo_node *parent;
	struct topo_node *node;
	int layer;
	int nlayers;
	int node_id;
	int i;
	#if defined(DEV_ACPI) && MAXMEMDOM > 1
	int d, domain;
	#endif

	if (cpu_topo_probed)
	return;

	CPU_ZERO(&logical_cpus_mask);

	if (mp_ncpus <= 1)
	; /* nothing */
	else if (cpu_vendor_id == CPU_VENDOR_AMD \|\|
	cpu_vendor_id == CPU_VENDOR_HYGON)
	topo_probe_amd();
	else if (cpu_vendor_id == CPU_VENDOR_INTEL)
	topo_probe_intel();

	KASSERT(pkg_id_shift >= core_id_shift,
	("bug in APIC topology discovery"));

	nlayers = 0;
	bzero(topo_layers, sizeof(topo_layers));

	topo_layers[nlayers].type = TOPO_TYPE_PKG;
	topo_layers[nlayers].id_shift = pkg_id_shift;
	if (bootverbose)
	printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift);
	nlayers++;

	if (pkg_id_shift > node_id_shift && node_id_shift != 0) {
	topo_layers[nlayers].type = TOPO_TYPE_GROUP;
	topo_layers[nlayers].id_shift = node_id_shift;
	if (bootverbose)
	printf("Node ID shift: %u\n",
	topo_layers[nlayers].id_shift);
	nlayers++;
	}

	/*
	* Consider all caches to be within a package/chip
	* and "in front" of all sub-components like
	* cores and hardware threads.
	*/
	for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) {
	if (caches[i].present) {
	if (node_id_shift != 0)
	KASSERT(caches[i].id_shift <= node_id_shift,
	("bug in APIC topology discovery"));
	KASSERT(caches[i].id_shift <= pkg_id_shift,
	("bug in APIC topology discovery"));
	KASSERT(caches[i].id_shift >= core_id_shift,
	("bug in APIC topology discovery"));

	topo_layers[nlayers].type = TOPO_TYPE_CACHE;
	topo_layers[nlayers].subtype = i + 1;
	topo_layers[nlayers].id_shift = caches[i].id_shift;
	if (bootverbose)
	printf("L%u cache ID shift: %u\n",
	topo_layers[nlayers].subtype,
	topo_layers[nlayers].id_shift);
	nlayers++;
	}
	}

	if (pkg_id_shift > core_id_shift) {
	topo_layers[nlayers].type = TOPO_TYPE_CORE;
	topo_layers[nlayers].id_shift = core_id_shift;
	if (bootverbose)
	printf("Core ID shift: %u\n",
	topo_layers[nlayers].id_shift);
	nlayers++;
	}

	topo_layers[nlayers].type = TOPO_TYPE_PU;
	topo_layers[nlayers].id_shift = 0;
	nlayers++;

	#if defined(DEV_ACPI) && MAXMEMDOM > 1
	if (vm_ndomains > 1) {
	for (layer = 0; layer < nlayers; ++layer) {
	for (i = 0; i <= max_apic_id; ++i) {
	if ((i & ((1 << topo_layers[layer].id_shift) - 1)) == 0)
	domain = -1;
	if (!cpu_info[i].cpu_present)
	continue;
	d = acpi_pxm_get_cpu_locality(i);
	if (domain >= 0 && domain != d)
	break;
	domain = d;
	}
	if (i > max_apic_id)
	break;
	}
	KASSERT(layer < nlayers, ("NUMA domain smaller than PU"));
	memmove(&topo_layers[layer+1], &topo_layers[layer],
	sizeof(topo_layers) (nlayers - layer));
	topo_layers[layer].type = TOPO_TYPE_NODE;
	topo_layers[layer].subtype = CG_SHARE_NONE;
	nlayers++;
	}
	#endif

	topo_init_root(&topo_root);
	for (i = 0; i <= max_apic_id; ++i) {
	if (!cpu_info[i].cpu_present)
	continue;

	parent = &topo_root;
	for (layer = 0; layer < nlayers; ++layer) {
	#if defined(DEV_ACPI) && MAXMEMDOM > 1
	if (topo_layers[layer].type == TOPO_TYPE_NODE) {
	node_id = acpi_pxm_get_cpu_locality(i);
	} else
	#endif
	node_id = i >> topo_layers[layer].id_shift;
	parent = topo_add_node_by_hwid(parent, node_id,
	topo_layers[layer].type,
	topo_layers[layer].subtype);
	}
	}

	parent = &topo_root;
	for (layer = 0; layer < nlayers; ++layer) {
	#if defined(DEV_ACPI) && MAXMEMDOM > 1
	if (topo_layers[layer].type == TOPO_TYPE_NODE)
	node_id = acpi_pxm_get_cpu_locality(boot_cpu_id);
	else
	#endif
	node_id = boot_cpu_id >> topo_layers[layer].id_shift;
	node = topo_find_node_by_hwid(parent, node_id,
	topo_layers[layer].type,
	topo_layers[layer].subtype);
	topo_promote_child(node);
	parent = node;
	}

	cpu_topo_probed = 1;
	}

	/*
	* Assign logical CPU IDs to local APICs.
	*/
	void
	assign_cpu_ids(void)
	{
	struct topo_node *node;
	u_int smt_mask;
	int nhyper;

	smt_mask = (1u << core_id_shift) - 1;

	/*
	* Assign CPU IDs to local APIC IDs and disable any CPUs
	* beyond MAXCPU. CPU 0 is always assigned to the BSP.
	*/
	mp_ncpus = 0;
	nhyper = 0;
	TOPO_FOREACH(node, &topo_root) {
	if (node->type != TOPO_TYPE_PU)
	continue;

	if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask))
	cpu_info[node->hwid].cpu_hyperthread = 1;

	if (resource_disabled("lapic", node->hwid)) {
	if (node->hwid != boot_cpu_id)
	cpu_info[node->hwid].cpu_disabled = 1;
	else
	printf("Cannot disable BSP, APIC ID = %d\n",
	node->hwid);
	}

	if (!hyperthreading_allowed &&
	cpu_info[node->hwid].cpu_hyperthread)
	cpu_info[node->hwid].cpu_disabled = 1;

	if (mp_ncpus >= MAXCPU)
	cpu_info[node->hwid].cpu_disabled = 1;

	if (cpu_info[node->hwid].cpu_disabled) {
	disabled_cpus++;
	continue;
	}

	if (cpu_info[node->hwid].cpu_hyperthread)
	nhyper++;

	cpu_apic_ids[mp_ncpus] = node->hwid;
	apic_cpuids[node->hwid] = mp_ncpus;
	topo_set_pu_id(node, mp_ncpus);
	mp_ncpus++;
	}

	KASSERT(mp_maxid >= mp_ncpus - 1,
	("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
	mp_ncpus));

	mp_ncores = mp_ncpus - nhyper;
	smp_threads_per_core = mp_ncpus / mp_ncores;
	}

	/*
	* Print various information about the SMP system hardware and setup.
	*/
	void
	cpu_mp_announce(void)
	{
	struct topo_node *node;
	const char *hyperthread;
	struct topo_analysis topology;

	printf("FreeBSD/SMP: ");
	if (topo_analyze(&topo_root, 1, &topology)) {
	printf("%d package(s)", topology.entities[TOPO_LEVEL_PKG]);
	if (topology.entities[TOPO_LEVEL_GROUP] > 1)
	printf(" x %d groups",
	topology.entities[TOPO_LEVEL_GROUP]);
	if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
	printf(" x %d cache groups",
	topology.entities[TOPO_LEVEL_CACHEGROUP]);
	if (topology.entities[TOPO_LEVEL_CORE] > 0)
	printf(" x %d core(s)",
	topology.entities[TOPO_LEVEL_CORE]);
	if (topology.entities[TOPO_LEVEL_THREAD] > 1)
	printf(" x %d hardware threads",
	topology.entities[TOPO_LEVEL_THREAD]);
	} else {
	printf("Non-uniform topology");
	}
	printf("\n");

	if (disabled_cpus) {
	printf("FreeBSD/SMP Online: ");
	if (topo_analyze(&topo_root, 0, &topology)) {
	printf("%d package(s)",
	topology.entities[TOPO_LEVEL_PKG]);
	if (topology.entities[TOPO_LEVEL_GROUP] > 1)
	printf(" x %d groups",
	topology.entities[TOPO_LEVEL_GROUP]);
	if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
	printf(" x %d cache groups",
	topology.entities[TOPO_LEVEL_CACHEGROUP]);
	if (topology.entities[TOPO_LEVEL_CORE] > 0)
	printf(" x %d core(s)",
	topology.entities[TOPO_LEVEL_CORE]);
	if (topology.entities[TOPO_LEVEL_THREAD] > 1)
	printf(" x %d hardware threads",
	topology.entities[TOPO_LEVEL_THREAD]);
	} else {
	printf("Non-uniform topology");
	}
	printf("\n");
	}

	if (!bootverbose)
	return;

	TOPO_FOREACH(node, &topo_root) {
	switch (node->type) {
	case TOPO_TYPE_PKG:
	printf("Package HW ID = %u\n", node->hwid);
	break;
	case TOPO_TYPE_CORE:
	printf("\tCore HW ID = %u\n", node->hwid);
	break;
	case TOPO_TYPE_PU:
	if (cpu_info[node->hwid].cpu_hyperthread)
	hyperthread = "/HT";
	else
	hyperthread = "";

	if (node->subtype == 0)
	printf("\t\tCPU (AP%s): APIC ID: %u"
	"(disabled)\n", hyperthread, node->hwid);
	else if (node->id == 0)
	printf("\t\tCPU0 (BSP): APIC ID: %u\n",
	node->hwid);
	else
	printf("\t\tCPU%u (AP%s): APIC ID: %u\n",
	node->id, hyperthread, node->hwid);
	break;
	default:
	/* ignored */
	break;
	}
	}
	}

	/*
	* Add a scheduling group, a group of logical processors sharing
	* a particular cache (and, thus having an affinity), to the scheduling
	* topology.
	* This function recursively works on lower level caches.
	*/
	static void
	x86topo_add_sched_group(struct topo_node root, struct cpu_group cg_root)
	{
	struct topo_node *node;
	int nchildren;
	int ncores;
	int i;

	KASSERT(root->type == TOPO_TYPE_SYSTEM \|\| root->type == TOPO_TYPE_CACHE \|\|
	root->type == TOPO_TYPE_NODE \|\| root->type == TOPO_TYPE_GROUP,
	("x86topo_add_sched_group: bad type: %u", root->type));
	CPU_COPY(&root->cpuset, &cg_root->cg_mask);
	cg_root->cg_count = root->cpu_count;
	if (root->type == TOPO_TYPE_CACHE)
	cg_root->cg_level = root->subtype;
	else
	cg_root->cg_level = CG_SHARE_NONE;
	if (root->type == TOPO_TYPE_NODE)
	cg_root->cg_flags = CG_FLAG_NODE;
	else
	cg_root->cg_flags = 0;

	+#if defined(CPUGRP_SCORE)
	+ /*
	+ * Set default performance/efficiency score.
	+ */
	+ memset(cg_root->cg_score, CG_SCORE_DEFAULT, sizeof(cg_root->cg_score));
	+#endif
	+
	/*
	* Check how many core nodes we have under the given root node.
	* If we have multiple logical processors, but not multiple
	* cores, then those processors must be hardware threads.
	*/
	ncores = 0;
	node = root;
	while (node != NULL) {
	if (node->type != TOPO_TYPE_CORE) {
	node = topo_next_node(root, node);
	continue;
	}

	ncores++;
	node = topo_next_nonchild_node(root, node);
	}

	if (cg_root->cg_level != CG_SHARE_NONE &&
	root->cpu_count > 1 && ncores < 2)
	cg_root->cg_flags \|= CG_FLAG_SMT;

	/*
	* Find out how many cache nodes we have under the given root node.
	* We ignore cache nodes that cover all the same processors as the
	* root node. Also, we do not descend below found cache nodes.
	* That is, we count top-level "non-redundant" caches under the root
	* node.
	*/
	nchildren = 0;
	node = root;
	while (node != NULL) {
	/*
	* When some APICs are disabled by tunables, nodes can end up
	* with an empty cpuset. Nodes with an empty cpuset will be
	* translated into cpu groups with empty cpusets. smp_topo_fill
	* will then set cg_first and cg_last to -1. This isn't
	* correctly handled in all functions. E.g. when
	* cpu_search_lowest and cpu_search_highest loop through all
	* cpus, they call CPU_ISSET on cpu -1 which ends up in a
	* general protection fault.
	*
	* We could fix the scheduler to handle empty cpu groups
	* correctly. Nevertheless, empty cpu groups are causing
	* overhead for no value. So, it makes more sense to just don't
	* create them.
	*/
	if (CPU_EMPTY(&node->cpuset)) {
	node = topo_next_node(root, node);
	continue;
	}
	if (CPU_CMP(&node->cpuset, &root->cpuset) == 0) {
	if (node->type == TOPO_TYPE_CACHE &&
	cg_root->cg_level < node->subtype)
	cg_root->cg_level = node->subtype;
	if (node->type == TOPO_TYPE_NODE)
	cg_root->cg_flags \|= CG_FLAG_NODE;
	node = topo_next_node(root, node);
	continue;
	}
	if (node->type != TOPO_TYPE_GROUP &&
	node->type != TOPO_TYPE_NODE &&
	node->type != TOPO_TYPE_CACHE) {
	node = topo_next_node(root, node);
	continue;
	}
	nchildren++;
	node = topo_next_nonchild_node(root, node);
	}

	/*
	* We are not interested in nodes including only one CPU each.
	*/
	if (nchildren == root->cpu_count)
	return;

	/*
	* We are not interested in nodes without children.
	*/
	cg_root->cg_children = nchildren;
	if (nchildren == 0)
	return;

	cg_root->cg_child = smp_topo_alloc(nchildren);

	/*
	* Now find again the same cache nodes as above and recursively
	* build scheduling topologies for them.
	*/
	node = root;
	i = 0;
	while (node != NULL) {
	if ((node->type != TOPO_TYPE_GROUP &&
	node->type != TOPO_TYPE_NODE &&
	node->type != TOPO_TYPE_CACHE) \|\|
	CPU_CMP(&node->cpuset, &root->cpuset) == 0 \|\|
	CPU_EMPTY(&node->cpuset)) {
	node = topo_next_node(root, node);
	continue;
	}
	cg_root->cg_child[i].cg_parent = cg_root;
	x86topo_add_sched_group(node, &cg_root->cg_child[i]);
	i++;
	node = topo_next_nonchild_node(root, node);
	}
	}

	/*
	* Build the MI scheduling topology from the discovered hardware topology.
	*/
	struct cpu_group *
	cpu_topo(void)
	{
	struct cpu_group *cg_root;

	if (mp_ncpus <= 1)
	return (smp_topo_none());

	cg_root = smp_topo_alloc(1);
	x86topo_add_sched_group(&topo_root, cg_root);
	return (cg_root);
	}

	static void
	cpu_alloc(void *dummy __unused)
	{
	/*
	* Dynamically allocate the arrays that depend on the
	* maximum APIC ID.
	*/
	cpu_info = malloc(sizeof(cpu_info) (max_apic_id + 1), M_CPUS,
	M_WAITOK \| M_ZERO);
	apic_cpuids = malloc(sizeof(apic_cpuids) (max_apic_id + 1), M_CPUS,
	M_WAITOK \| M_ZERO);
	}
	SYSINIT(cpu_alloc, SI_SUB_CPU, SI_ORDER_FIRST, cpu_alloc, NULL);

	/*
	* Add a logical CPU to the topology.
	*/
	void
	cpu_add(u_int apic_id, char boot_cpu)
	{

	if (apic_id > max_apic_id)
	panic("SMP: APIC ID %d too high", apic_id);

	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %u added twice",
	apic_id));
	cpu_info[apic_id].cpu_present = 1;
	if (boot_cpu) {
	KASSERT(boot_cpu_id == -1,
	("CPU %u claims to be BSP, but CPU %u already is", apic_id,
	boot_cpu_id));
	boot_cpu_id = apic_id;
	cpu_info[apic_id].cpu_bsp = 1;
	}
	if (bootverbose)
	printf("SMP: Added CPU %u (%s)\n", apic_id, boot_cpu ? "BSP" :
	"AP");
	}

	void
	cpu_mp_setmaxid(void)
	{

	/*
	* mp_ncpus and mp_maxid should be already set by calls to cpu_add().
	* If there were no calls to cpu_add() assume this is a UP system.
	*/
	if (mp_ncpus == 0)
	mp_ncpus = 1;
	}

	int
	cpu_mp_probe(void)
	{

	/*
	* Always record BSP in CPU map so that the mbuf init code works
	* correctly.
	*/
	CPU_SETOF(0, &all_cpus);
	return (mp_ncpus > 1);
	}

	/*
	* AP CPU's call this to initialize themselves.
	*/
	void
	init_secondary_tail(void)
	{
	u_int cpuid;

	pmap_activate_boot(vmspace_pmap(proc0.p_vmspace));

	/*
	* On real hardware, switch to x2apic mode if possible. Do it
	* after aps_ready was signalled, to avoid manipulating the
	* mode while BSP might still want to send some IPI to us
	* (second startup IPI is ignored on modern hardware etc).
	*/
	lapic_xapic_mode();

	/* Initialize the PAT MSR. */
	pmap_init_pat();

	/* set up CPU registers and state */
	cpu_setregs();

	/* set up SSE/NX */
	initializecpu();

	/* set up FPU state on the AP */
	#ifdef __amd64__
	fpuinit();
	#else
	npxinit(false);
	#endif

	if (cpu_ops.cpu_init)
	cpu_ops.cpu_init();

	/* A quick check from sanity claus */
	cpuid = PCPU_GET(cpuid);
	if (PCPU_GET(apic_id) != lapic_id()) {
	printf("SMP: cpuid = %d\n", cpuid);
	printf("SMP: actual apic_id = %d\n", lapic_id());
	printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
	panic("cpuid mismatch! boom!!");
	}

	/* Initialize curthread. */
	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
	PCPU_SET(curthread, PCPU_GET(idlethread));
	schedinit_ap();

	mtx_lock_spin(&ap_boot_mtx);

	mca_init();

	/* Init local apic for irq's */
	lapic_setup(1);

	/* Set memory range attributes for this CPU to match the BSP */
	mem_range_AP_init();

	smp_cpus++;

	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
	if (bootverbose)
	printf("SMP: AP CPU #%d Launched!\n", cpuid);
	else
	printf("%s%d%s", smp_cpus == 2 ? "Launching APs: " : "",
	cpuid, smp_cpus == mp_ncpus ? "\n" : " ");

	/* Determine if we are a logical CPU. */
	if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread)
	CPU_SET(cpuid, &logical_cpus_mask);

	if (bootverbose)
	lapic_dump("AP");

	if (smp_cpus == mp_ncpus) {
	/* enable IPI's, tlb shootdown, freezes etc */
	atomic_store_rel_int(&smp_started, 1);
	}

	#ifdef __amd64__
	if (pmap_pcid_enabled)
	load_cr4(rcr4() \| CR4_PCIDE);
	load_ds(_udatasel);
	load_es(_udatasel);
	load_fs(_ufssel);
	#endif

	mtx_unlock_spin(&ap_boot_mtx);

	/* Wait until all the AP's are up. */
	while (atomic_load_acq_int(&smp_started) == 0)
	ia32_pause();

	kcsan_cpu_init(cpuid);

	sched_ap_entry();

	panic("scheduler returned us to %s", __func__);
	/* NOTREACHED */
	}

	static void
	smp_after_idle_runnable(void *arg __unused)
	{
	int cpu;

	if (mp_ncpus == 1)
	return;

	KASSERT(smp_started != 0, ("%s: SMP not started yet", __func__));

	/*
	* Wait for all APs to handle an interrupt. After that, we know that
	* the APs have entered the scheduler at least once, so the boot stacks
	* are safe to free.
	*/
	smp_rendezvous(smp_no_rendezvous_barrier, NULL,
	smp_no_rendezvous_barrier, NULL);

	for (cpu = 1; cpu < mp_ncpus; cpu++) {
	kmem_free(bootstacks[cpu], kstack_pages * PAGE_SIZE);
	}
	}
	SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
	smp_after_idle_runnable, NULL);

	/*
	* We tell the I/O APIC code about all the CPUs we want to receive
	* interrupts. If we don't want certain CPUs to receive IRQs we
	* can simply not tell the I/O APIC code about them in this function.
	* We also do not tell it about the BSP since it tells itself about
	* the BSP internally to work with UP kernels and on UP machines.
	*/
	void
	set_interrupt_apic_ids(void)
	{
	u_int i, apic_id;

	for (i = 0; i < MAXCPU; i++) {
	apic_id = cpu_apic_ids[i];
	if (apic_id == -1)
	continue;
	if (cpu_info[apic_id].cpu_bsp)
	continue;
	if (cpu_info[apic_id].cpu_disabled)
	continue;
	if (intr_apic_id_limit >= 0 && apic_id > intr_apic_id_limit)
	continue;

	/* Don't let hyperthreads service interrupts. */
	if (cpu_info[apic_id].cpu_hyperthread &&
	!hyperthreading_intr_allowed)
	continue;

	intr_add_cpu(i);
	}
	}

	#ifdef COUNT_XINVLTLB_HITS
	u_int xhits_gbl[MAXCPU];
	u_int xhits_pg[MAXCPU];
	u_int xhits_rng[MAXCPU];
	static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	"");
	SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
	sizeof(xhits_gbl), "IU", "");
	SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
	sizeof(xhits_pg), "IU", "");
	SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
	sizeof(xhits_rng), "IU", "");

	u_int ipi_global;
	u_int ipi_page;
	u_int ipi_range;
	u_int ipi_range_size;
	SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
	SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
	SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
	SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
	0, "");
	#endif /* COUNT_XINVLTLB_HITS */

	/*
	* Init and startup IPI.
	*/
	void
	ipi_startup(int apic_id, int vector)
	{

	/*
	* This attempts to follow the algorithm described in the
	* Intel Multiprocessor Specification v1.4 in section B.4.
	* For each IPI, we allow the local APIC ~20us to deliver the
	* IPI. If that times out, we panic.
	*/

	/*
	* first we do an INIT IPI: this INIT IPI might be run, resetting
	* and running the target CPU. OR this INIT IPI might be latched (P5
	* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
	* ignored.
	*/
	lapic_ipi_raw(APIC_DEST_DESTFLD \| APIC_TRIGMOD_LEVEL \|
	APIC_LEVEL_ASSERT \| APIC_DESTMODE_PHY \| APIC_DELMODE_INIT, apic_id);
	lapic_ipi_wait(100);

	/* Explicitly deassert the INIT IPI. */
	lapic_ipi_raw(APIC_DEST_DESTFLD \| APIC_TRIGMOD_LEVEL \|
	APIC_LEVEL_DEASSERT \| APIC_DESTMODE_PHY \| APIC_DELMODE_INIT,
	apic_id);

	DELAY(10000); /* wait ~10mS */

	/*
	* next we do a STARTUP IPI: the previous INIT IPI might still be
	* latched, (P5 bug) this 1st STARTUP would then terminate
	* immediately, and the previously started INIT IPI would continue. OR
	* the previous INIT IPI has already run. and this STARTUP IPI will
	* run. OR the previous INIT IPI was ignored. and this STARTUP IPI
	* will run.
	*/
	lapic_ipi_raw(APIC_DEST_DESTFLD \| APIC_TRIGMOD_EDGE \|
	APIC_LEVEL_ASSERT \| APIC_DESTMODE_PHY \| APIC_DELMODE_STARTUP \|
	vector, apic_id);
	if (!lapic_ipi_wait(100))
	panic("Failed to deliver first STARTUP IPI to APIC %d",
	apic_id);
	DELAY(200); /* wait ~200uS */

	/*
	* finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
	* the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
	* this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
	* recognized after hardware RESET or INIT IPI.
	*/
	lapic_ipi_raw(APIC_DEST_DESTFLD \| APIC_TRIGMOD_EDGE \|
	APIC_LEVEL_ASSERT \| APIC_DESTMODE_PHY \| APIC_DELMODE_STARTUP \|
	vector, apic_id);
	if (!lapic_ipi_wait(100))
	panic("Failed to deliver second STARTUP IPI to APIC %d",
	apic_id);

	DELAY(200); /* wait ~200uS */
	}

	static bool
	ipi_bitmap_set(int cpu, u_int ipi)
	{
	u_int bitmap, old, new;
	u_int *cpu_bitmap;

	bitmap = 1 << ipi;
	cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap;
	old = *cpu_bitmap;
	for (;;) {
	if ((old & bitmap) != 0)
	break;
	new = old \| bitmap;
	if (atomic_fcmpset_int(cpu_bitmap, &old, new))
	break;
	}
	return (old != 0);
	}

	/*
	* Send an IPI to specified CPU handling the bitmap logic.
	*/
	static void
	ipi_send_cpu(int cpu, u_int ipi)
	{

	KASSERT((u_int)cpu < MAXCPU && cpu_apic_ids[cpu] != -1,
	("IPI to non-existent CPU %d", cpu));

	if (IPI_IS_BITMAPED(ipi)) {
	if (ipi_bitmap_set(cpu, ipi))
	return;
	ipi = IPI_BITMAP_VECTOR;
	}
	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
	}

	void
	ipi_bitmap_handler(struct trapframe frame)
	{
	struct trapframe *oldframe;
	struct thread *td;
	int cpu = PCPU_GET(cpuid);
	u_int ipi_bitmap;

	kasan_mark(&frame, sizeof(frame), sizeof(frame), 0);

	td = curthread;
	ipi_bitmap = atomic_readandclear_int(&cpuid_to_pcpu[cpu]->
	pc_ipi_bitmap);

	/*
	* sched_preempt() must be called to clear the pending preempt
	* IPI to enable delivery of further preempts. However, the
	* critical section will cause extra scheduler lock thrashing
	* when used unconditionally. Only critical_enter() if
	* hardclock must also run, which requires the section entry.
	*/
	if (ipi_bitmap & (1 << IPI_HARDCLOCK))
	critical_enter();

	td->td_intr_nesting_level++;
	oldframe = td->td_intr_frame;
	td->td_intr_frame = &frame;
	#if defined(STACK) \|\| defined(DDB)
	if (ipi_bitmap & (1 << IPI_TRACE))
	stack_capture_intr();
	#endif
	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
	#ifdef COUNT_IPIS
	(*ipi_preempt_counts[cpu])++;
	#endif
	sched_preempt(td);
	}
	if (ipi_bitmap & (1 << IPI_AST)) {
	#ifdef COUNT_IPIS
	(*ipi_ast_counts[cpu])++;
	#endif
	/* Nothing to do for AST */
	}
	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
	#ifdef COUNT_IPIS
	(*ipi_hardclock_counts[cpu])++;
	#endif
	hardclockintr();
	}
	td->td_intr_frame = oldframe;
	td->td_intr_nesting_level--;
	if (ipi_bitmap & (1 << IPI_HARDCLOCK))
	critical_exit();
	}

	/*
	* send an IPI to a set of cpus.
	*/
	void
	ipi_selected(cpuset_t cpus, u_int ipi)
	{
	int cpu;

	/*
	* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
	* of help in order to understand what is the source.
	* Set the mask of receiving CPUs for this purpose.
	*/
	if (ipi == IPI_STOP_HARD)
	CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus);

	CPU_FOREACH_ISSET(cpu, &cpus) {
	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
	ipi_send_cpu(cpu, ipi);
	}
	}

	/*
	* send an IPI to a specific CPU.
	*/
	void
	ipi_cpu(int cpu, u_int ipi)
	{

	/*
	* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
	* of help in order to understand what is the source.
	* Set the mask of receiving CPUs for this purpose.
	*/
	if (ipi == IPI_STOP_HARD)
	CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending);

	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
	ipi_send_cpu(cpu, ipi);
	}

	/*
	* send an IPI to all CPUs EXCEPT myself
	*/
	void
	ipi_all_but_self(u_int ipi)
	{
	cpuset_t other_cpus;
	int cpu, c;

	/*
	* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
	* of help in order to understand what is the source.
	* Set the mask of receiving CPUs for this purpose.
	*/
	if (ipi == IPI_STOP_HARD) {
	other_cpus = all_cpus;
	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
	CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus);
	}

	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
	if (IPI_IS_BITMAPED(ipi)) {
	cpu = PCPU_GET(cpuid);
	CPU_FOREACH(c) {
	if (c != cpu)
	ipi_bitmap_set(c, ipi);
	}
	ipi = IPI_BITMAP_VECTOR;
	}
	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
	}

	void
	ipi_self_from_nmi(u_int vector)
	{

	lapic_ipi_vectored(vector, APIC_IPI_DEST_SELF);

	/* Wait for IPI to finish. */
	if (!lapic_ipi_wait(50000)) {
	if (KERNEL_PANICKED())
	return;
	else
	panic("APIC: IPI is stuck");
	}
	}

	int
	ipi_nmi_handler(void)
	{
	u_int cpuid;

	/*
	* As long as there is not a simple way to know about a NMI's
	* source, if the bitmask for the current CPU is present in
	* the global pending bitword an IPI_STOP_HARD has been issued
	* and should be handled.
	*/
	cpuid = PCPU_GET(cpuid);
	if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending))
	return (1);

	CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending);
	cpustop_handler();
	return (0);
	}

	int nmi_kdb_lock;

	void
	nmi_call_kdb_smp(u_int type, struct trapframe *frame)
	{
	int cpu;
	bool call_post;

	cpu = PCPU_GET(cpuid);
	if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
	nmi_call_kdb(cpu, type, frame);
	call_post = false;
	} else {
	savectx(&stoppcbs[cpu]);
	CPU_SET_ATOMIC(cpu, &stopped_cpus);
	while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1))
	ia32_pause();
	call_post = true;
	}
	atomic_store_rel_int(&nmi_kdb_lock, 0);
	if (call_post)
	cpustop_handler_post(cpu);
	}

	/*
	* Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
	* if available) until we are resumed.
	*/
	void
	cpustop_handler(void)
	{
	struct monitorbuf *mb;
	u_int cpu;
	bool use_mwait;

	cpu = PCPU_GET(cpuid);

	savectx(&stoppcbs[cpu]);

	use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
	!mwait_cpustop_broken);
	if (use_mwait) {
	mb = PCPU_PTR(monitorbuf);
	atomic_store_int(&mb->stop_state,
	MONITOR_STOPSTATE_STOPPED);
	}

	/* Indicate that we are stopped */
	CPU_SET_ATOMIC(cpu, &stopped_cpus);

	/* Wait for restart */
	while (!CPU_ISSET(cpu, &started_cpus)) {
	if (use_mwait) {
	cpu_monitor(mb, 0, 0);
	if (atomic_load_int(&mb->stop_state) ==
	MONITOR_STOPSTATE_STOPPED)
	cpu_mwait(0, MWAIT_C1);
	continue;
	}

	ia32_pause();

	/*
	* Halt non-BSP CPUs on panic -- we're never going to need them
	* again, and might as well save power / release resources
	* (e.g., overprovisioned VM infrastructure).
	*/
	while (__predict_false(!IS_BSP() && KERNEL_PANICKED()))
	halt();
	}

	cpustop_handler_post(cpu);
	}

	static void
	cpustop_handler_post(u_int cpu)
	{

	CPU_CLR_ATOMIC(cpu, &started_cpus);
	CPU_CLR_ATOMIC(cpu, &stopped_cpus);

	/*
	* We don't broadcast TLB invalidations to other CPUs when they are
	* stopped. Hence, we clear the TLB before resuming.
	*/
	invltlb_glob();

	#if defined(__amd64__) && (defined(DDB) \|\| defined(GDB))
	amd64_db_resume_dbreg();
	#endif

	if (cpu == 0 && cpustop_restartfunc != NULL) {
	cpustop_restartfunc();
	cpustop_restartfunc = NULL;
	}
	}

	/*
	* Handle an IPI_SUSPEND by saving our current context and spinning until we
	* are resumed.
	*/
	void
	cpususpend_handler(void)
	{
	u_int cpu;

	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);

	cpu = PCPU_GET(cpuid);

	#ifdef XENHVM
	/*
	* Some Xen guest types (PVH) expose a very minimal set of ACPI tables,
	* and for example have no support for SCI. That leads to the suspend
	* stacks not being allocated, and hence when attempting to perform a
	* Xen triggered suspension FreeBSD will hit a #PF. Avoid saving the
	* CPU and FPU contexts if the stacks are not allocated, as the
	* hypervisor will already take care of this. Note that we could even
	* do this for Xen triggered suspensions on guests that have full ACPI
	* support, but doing so would introduce extra complexity.
	*/
	if (susppcbs == NULL) {
	KASSERT(vm_guest == VM_GUEST_XEN, ("Missing suspend stack"));
	CPU_SET_ATOMIC(cpu, &suspended_cpus);
	CPU_SET_ATOMIC(cpu, &resuming_cpus);
	} else
	#endif
	if (savectx(&susppcbs[cpu]->sp_pcb)) {
	#ifdef __amd64__
	fpususpend(susppcbs[cpu]->sp_fpususpend);
	#else
	npxsuspend(susppcbs[cpu]->sp_fpususpend);
	#endif
	/*
	* suspended_cpus is cleared shortly after each AP is restarted
	* by a Startup IPI, so that the BSP can proceed to restarting
	* the next AP.
	*
	* resuming_cpus gets cleared when the AP completes
	* initialization after having been released by the BSP.
	* resuming_cpus is probably not the best name for the
	* variable, because it is actually a set of processors that
	* haven't resumed yet and haven't necessarily started resuming.
	*
	* Note that suspended_cpus is meaningful only for ACPI suspend
	* as it's not really used for Xen suspend since the APs are
	* automatically restored to the running state and the correct
	* context. For the same reason resumectx is never called in
	* that case.
	*/
	CPU_SET_ATOMIC(cpu, &suspended_cpus);
	CPU_SET_ATOMIC(cpu, &resuming_cpus);

	/*
	* Invalidate the cache after setting the global status bits.
	* The last AP to set its bit may end up being an Owner of the
	* corresponding cache line in MOESI protocol. The AP may be
	* stopped before the cache line is written to the main memory.
	*/
	wbinvd();
	} else {
	#ifdef __amd64__
	fpuresume(susppcbs[cpu]->sp_fpususpend);
	#else
	npxresume(susppcbs[cpu]->sp_fpususpend);
	#endif
	pmap_init_pat();
	initializecpu();
	PCPU_SET(switchtime, 0);
	PCPU_SET(switchticks, ticks);

	/* Indicate that we have restarted and restored the context. */
	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
	}

	/* Wait for resume directive */
	while (!CPU_ISSET(cpu, &toresume_cpus))
	ia32_pause();

	/* Re-apply microcode updates. */
	ucode_reload();

	#ifdef __i386__
	/* Finish removing the identity mapping of low memory for this AP. */
	invltlb_glob();
	#endif

	if (cpu_ops.cpu_resume)
	cpu_ops.cpu_resume();
	#ifdef __amd64__
	if (vmm_resume_p)
	vmm_resume_p();
	#endif

	/* Resume MCA and local APIC */
	lapic_xapic_mode();
	mca_resume();
	lapic_setup(0);

	/* Indicate that we are resumed */
	CPU_CLR_ATOMIC(cpu, &resuming_cpus);
	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
	CPU_CLR_ATOMIC(cpu, &toresume_cpus);
	}

	/*
	* Handle an IPI_SWI by waking delayed SWI thread.
	*/
	void
	ipi_swi_handler(struct trapframe frame)
	{

	intr_event_handle(clk_intr_event, &frame);
	}

	/*
	* This is called once the rest of the system is up and running and we're
	* ready to let the AP's out of the pen.
	*/
	static void
	release_aps(void *dummy __unused)
	{

	if (mp_ncpus == 1)
	return;
	atomic_store_rel_int(&aps_ready, 1);
	while (smp_started == 0)
	ia32_pause();
	}
	SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);

	#ifdef COUNT_IPIS
	/*
	* Setup interrupt counters for IPI handlers.
	*/
	static void
	mp_ipi_intrcnt(void *dummy)
	{
	char buf[64];
	int i;

	CPU_FOREACH(i) {
	snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
	intrcnt_add(buf, &ipi_invltlb_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
	intrcnt_add(buf, &ipi_invlrng_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
	intrcnt_add(buf, &ipi_invlpg_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
	intrcnt_add(buf, &ipi_invlcache_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
	intrcnt_add(buf, &ipi_preempt_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:ast", i);
	intrcnt_add(buf, &ipi_ast_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
	intrcnt_add(buf, &ipi_rendezvous_counts[i]);
	snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
	intrcnt_add(buf, &ipi_hardclock_counts[i]);
	}
	}
	SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
	#endif

File Metadata

Mime Type: text/x-diff
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 8506201
Default Alt Text: part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch (117 KB)

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patchkoinec_yahoo.co.jp (Koine Yuusuke)Actions

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patchView Options

File Metadata

Event Timeline

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch
koinec_yahoo.co.jp (Koine Yuusuke)
Actions

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch
View Options