Page MenuHomeFreeBSD

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch

Authored By
koinec_yahoo.co.jp
Mar 24 2024, 11:39 AM
Size
117 KB
Referenced Files
None
Subscribers
None

part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch

diff --git a/sys/conf/options b/sys/conf/options
index 555484360a2b..e625fdca214d 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -1,1024 +1,1025 @@
#
# On the handling of kernel options
#
# All kernel options should be listed in NOTES, with suitable
# descriptions. Negative options (options that make some code not
# compile) should be commented out; LINT (generated from NOTES) should
# compile as much code as possible. Try to structure option-using
# code so that a single option only switch code on, or only switch
# code off, to make it possible to have a full compile-test. If
# necessary, you can check for COMPILING_LINT to get maximum code
# coverage.
#
# All new options shall also be listed in either "conf/options" or
# "conf/options.<machine>". Options that affect a single source-file
# <xxx>.[c|s] should be directed into "opt_<xxx>.h", while options
# that affect multiple files should either go in "opt_global.h" if
# this is a kernel-wide option (used just about everywhere), or in
# "opt_<option-name-in-lower-case>.h" if it affects only some files.
# Note that the effect of listing only an option without a
# header-file-name in conf/options (and cousins) is that the last
# convention is followed.
#
# This handling scheme is not yet fully implemented.
#
#
# Format of this file:
# Option name filename
#
# If filename is missing, the default is
# opt_<name-of-option-in-lower-case>.h
AAC_DEBUG opt_aac.h
AACRAID_DEBUG opt_aacraid.h
AHC_ALLOW_MEMIO opt_aic7xxx.h
AHC_TMODE_ENABLE opt_aic7xxx.h
AHC_DUMP_EEPROM opt_aic7xxx.h
AHC_DEBUG opt_aic7xxx.h
AHC_DEBUG_OPTS opt_aic7xxx.h
AHC_REG_PRETTY_PRINT opt_aic7xxx.h
AHD_DEBUG opt_aic79xx.h
AHD_DEBUG_OPTS opt_aic79xx.h
AHD_TMODE_ENABLE opt_aic79xx.h
AHD_REG_PRETTY_PRINT opt_aic79xx.h
# Debugging options.
ALT_BREAK_TO_DEBUGGER opt_kdb.h
BREAK_TO_DEBUGGER opt_kdb.h
BUF_TRACKING opt_global.h
DDB
DDB_BUFR_SIZE opt_ddb.h
DDB_CAPTURE_DEFAULTBUFSIZE opt_ddb.h
DDB_CAPTURE_MAXBUFSIZE opt_ddb.h
DDB_CTF opt_ddb.h
DDB_NUMSYM opt_ddb.h
EARLY_PRINTF opt_global.h
FULL_BUF_TRACKING opt_global.h
GDB
KDB opt_global.h
KDB_TRACE opt_kdb.h
KDB_UNATTENDED opt_kdb.h
KLD_DEBUG opt_kld.h
NUM_CORE_FILES opt_global.h
QUEUE_MACRO_DEBUG_TRACE opt_global.h
QUEUE_MACRO_DEBUG_TRASH opt_global.h
SYSCTL_DEBUG opt_sysctl.h
TEXTDUMP_PREFERRED opt_ddb.h
TEXTDUMP_VERBOSE opt_ddb.h
TSLOG opt_global.h
TSLOG_PAGEZERO opt_global.h
TSLOGSIZE opt_global.h
# Miscellaneous options.
ALQ
ALTERA_SDCARD_FAST_SIM opt_altera_sdcard.h
ATSE_CFI_HACK opt_cfi.h
AUDIT opt_global.h
BOOTHOWTO opt_global.h
BOOTVERBOSE opt_global.h
CALLOUT_PROFILING
CAPABILITIES opt_capsicum.h
CAPABILITY_MODE opt_capsicum.h
CC_CDG opt_global.h
CC_CHD opt_global.h
CC_CUBIC opt_global.h
CC_DEFAULT opt_cc.h
CC_DCTCP opt_global.h
CC_HD opt_global.h
CC_HTCP opt_global.h
CC_NEWRENO opt_global.h
CC_VEGAS opt_global.h
COMPAT_43 opt_global.h
COMPAT_43TTY opt_global.h
COMPAT_FREEBSD4 opt_global.h
COMPAT_FREEBSD5 opt_global.h
COMPAT_FREEBSD6 opt_global.h
COMPAT_FREEBSD7 opt_global.h
COMPAT_FREEBSD9 opt_global.h
COMPAT_FREEBSD10 opt_global.h
COMPAT_FREEBSD11 opt_global.h
COMPAT_FREEBSD12 opt_global.h
COMPAT_FREEBSD13 opt_global.h
COMPAT_FREEBSD14 opt_global.h
COMPAT_LINUXKPI opt_dontuse.h
COMPILING_LINT opt_global.h
CY_PCI_FASTINTR
DEADLKRES opt_watchdog.h
EXPERIMENTAL opt_global.h
DIRECTIO
FILEMON opt_dontuse.h
FFCLOCK
FULL_PREEMPTION opt_sched.h
GZIO opt_gzio.h
IMGACT_BINMISC opt_dontuse.h
IPI_PREEMPTION opt_sched.h
GEOM_BDE opt_geom.h
GEOM_CACHE opt_geom.h
GEOM_CONCAT opt_geom.h
GEOM_ELI opt_geom.h
GEOM_GATE opt_geom.h
GEOM_JOURNAL opt_geom.h
GEOM_LABEL opt_geom.h
GEOM_LABEL_GPT opt_geom.h
GEOM_LINUX_LVM opt_geom.h
GEOM_MAP opt_geom.h
GEOM_MIRROR opt_geom.h
GEOM_MOUNTVER opt_geom.h
GEOM_MULTIPATH opt_geom.h
GEOM_NOP opt_geom.h
GEOM_PART_APM opt_geom.h
GEOM_PART_BSD opt_geom.h
GEOM_PART_BSD64 opt_geom.h
GEOM_PART_EBR opt_geom.h
GEOM_PART_GPT opt_geom.h
GEOM_PART_LDM opt_geom.h
GEOM_PART_MBR opt_geom.h
GEOM_RAID opt_geom.h
GEOM_RAID3 opt_geom.h
GEOM_SHSEC opt_geom.h
GEOM_STRIPE opt_geom.h
GEOM_UZIP opt_geom.h
GEOM_UZIP_DEBUG opt_geom.h
GEOM_VINUM opt_geom.h
GEOM_VIRSTOR opt_geom.h
GEOM_ZERO opt_geom.h
IFLIB opt_iflib.h
KDTRACE_HOOKS opt_global.h
KDTRACE_FRAME opt_kdtrace.h
KN_HASHSIZE opt_kqueue.h
KSTACK_MAX_PAGES
KSTACK_PAGES
KSTACK_USAGE_PROF
KTRACE
KTRACE_REQUEST_POOL opt_ktrace.h
LIBICONV
MAC opt_global.h
MAC_BIBA opt_dontuse.h
MAC_BSDEXTENDED opt_dontuse.h
MAC_DDB opt_dontuse.h
MAC_IFOFF opt_dontuse.h
MAC_IPACL opt_dontuse.h
MAC_LOMAC opt_dontuse.h
MAC_MLS opt_dontuse.h
MAC_NONE opt_dontuse.h
MAC_NTPD opt_dontuse.h
MAC_PARTITION opt_dontuse.h
MAC_PORTACL opt_dontuse.h
MAC_PRIORITY opt_dontuse.h
MAC_SEEOTHERUIDS opt_dontuse.h
MAC_STATIC opt_mac.h
MAC_STUB opt_dontuse.h
MAC_TEST opt_dontuse.h
MAC_GRANTBYLABEL opt_dontuse.h
MAC_VERIEXEC opt_dontuse.h
MAC_VERIEXEC_SHA1 opt_dontuse.h
MAC_VERIEXEC_SHA256 opt_dontuse.h
MAC_VERIEXEC_SHA384 opt_dontuse.h
MAC_VERIEXEC_SHA512 opt_dontuse.h
MD_ROOT opt_md.h
MD_ROOT_FSTYPE opt_md.h
MD_ROOT_READONLY opt_md.h
MD_ROOT_SIZE opt_md.h
MD_ROOT_MEM opt_md.h
MFI_DEBUG opt_mfi.h
MFI_DECODE_LOG opt_mfi.h
MPROF_BUFFERS opt_mprof.h
MPROF_HASH_SIZE opt_mprof.h
NEW_PCIB opt_global.h
NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h
NO_ADAPTIVE_RWLOCKS
NO_ADAPTIVE_SX
NO_OBSOLETE_CODE opt_global.h
NO_SYSCTL_DESCR opt_global.h
NSWBUF_MIN opt_param.h
MBUF_PACKET_ZONE_DISABLE opt_global.h
PANIC_REBOOT_WAIT_TIME opt_panic.h
PCI_HP opt_pci.h
PCI_IOV opt_global.h
PPC_DEBUG opt_ppc.h
PPC_PROBE_CHIPSET opt_ppc.h
PPS_SYNC opt_ntp.h
PREEMPTION opt_sched.h
QUOTA
SCHED_4BSD opt_sched.h
SCHED_STATS opt_sched.h
SCHED_ULE opt_sched.h
SLEEPQUEUE_PROFILING
SLHCI_DEBUG opt_slhci.h
STACK opt_stack.h
SUIDDIR
MSGMNB opt_sysvipc.h
MSGMNI opt_sysvipc.h
MSGSEG opt_sysvipc.h
MSGSSZ opt_sysvipc.h
MSGTQL opt_sysvipc.h
SEMMNI opt_sysvipc.h
SEMMNS opt_sysvipc.h
SEMMNU opt_sysvipc.h
SEMMSL opt_sysvipc.h
SEMOPM opt_sysvipc.h
SEMUME opt_sysvipc.h
SHMALL opt_sysvipc.h
SHMMAX opt_sysvipc.h
SHMMAXPGS opt_sysvipc.h
SHMMIN opt_sysvipc.h
SHMMNI opt_sysvipc.h
SHMSEG opt_sysvipc.h
SYSVMSG opt_sysvipc.h
SYSVSEM opt_sysvipc.h
SYSVSHM opt_sysvipc.h
SW_WATCHDOG opt_watchdog.h
TCPHPTS
TCP_REQUEST_TRK opt_global.h
TCP_ACCOUNTING opt_global.h
TCP_BBR opt_inet.h
TCP_RACK opt_inet.h
#
# TCP SaD Detection is an experimental Sack attack Detection (SaD)
# algorithm that uses "normal" behaviour with SACK's to detect
# a possible attack. It is strictly experimental at this point.
#
TCP_SAD_DETECTION opt_inet.h
TURNSTILE_PROFILING
UMTX_PROFILING
UMTX_CHAINS opt_global.h
VERBOSE_SYSINIT
ZSTDIO opt_zstdio.h
# Sanitizers
COVERAGE opt_global.h
KASAN opt_global.h
KCOV
KCSAN opt_global.h
KMSAN opt_global.h
KUBSAN opt_global.h
# POSIX kernel options
P1003_1B_MQUEUE opt_posix.h
P1003_1B_SEMAPHORES opt_posix.h
_KPOSIX_PRIORITY_SCHEDULING opt_posix.h
# Do we want the config file compiled into the kernel?
INCLUDE_CONFIG_FILE opt_config.h
# Options for static filesystems. These should only be used at config
# time, since the corresponding lkms cannot work if there are any static
# dependencies. Unusability is enforced by hiding the defines for the
# options in a never-included header.
AUTOFS opt_dontuse.h
CD9660 opt_dontuse.h
EXT2FS opt_dontuse.h
FDESCFS opt_dontuse.h
FFS opt_dontuse.h
FUSEFS opt_dontuse.h
MSDOSFS opt_dontuse.h
NULLFS opt_dontuse.h
PROCFS opt_dontuse.h
PSEUDOFS opt_dontuse.h
SMBFS opt_dontuse.h
TARFS opt_dontuse.h
TMPFS opt_dontuse.h
UDF opt_dontuse.h
UNIONFS opt_dontuse.h
ZFS opt_dontuse.h
# Pseudofs debugging
PSEUDOFS_TRACE opt_pseudofs.h
# Tarfs debugging
TARFS_DEBUG opt_tarfs.h
# In-kernel GSS-API
KGSSAPI opt_kgssapi.h
KGSSAPI_DEBUG opt_kgssapi.h
# These static filesystems have one slightly bogus static dependency in
# sys/i386/i386/autoconf.c. If any of these filesystems are
# statically compiled into the kernel, code for mounting them as root
# filesystems will be enabled - but look below.
# NFSCL - client
# NFSD - server
NFSCL opt_nfs.h
NFSD opt_nfs.h
# filesystems and libiconv bridge
CD9660_ICONV opt_dontuse.h
MSDOSFS_ICONV opt_dontuse.h
UDF_ICONV opt_dontuse.h
# If you are following the conditions in the copyright,
# you can enable soft-updates which will speed up a lot of thigs
# and make the system safer from crashes at the same time.
# otherwise a STUB module will be compiled in.
SOFTUPDATES opt_ffs.h
# On small, embedded systems, it can be useful to turn off support for
# snapshots. It saves about 30-40k for a feature that would be lightly
# used, if it is used at all.
NO_FFS_SNAPSHOT opt_ffs.h
# Enabling this option turns on support for Access Control Lists in UFS,
# which can be used to support high security configurations. Depends on
# UFS_EXTATTR.
UFS_ACL opt_ufs.h
# Enabling this option turns on support for extended attributes in UFS-based
# filesystems, which can be used to support high security configurations
# as well as new filesystem features.
UFS_EXTATTR opt_ufs.h
UFS_EXTATTR_AUTOSTART opt_ufs.h
# Enable fast hash lookups for large directories on UFS-based filesystems.
UFS_DIRHASH opt_ufs.h
# Enable gjournal-based UFS journal.
UFS_GJOURNAL opt_ufs.h
# The below sentence is not in English, and neither is this one.
# We plan to remove the static dependences above, with a
# <filesystem>_ROOT option to control if it usable as root. This list
# allows these options to be present in config files already (though
# they won't make any difference yet).
NFS_ROOT opt_nfsroot.h
# SMB/CIFS requester
NETSMB opt_netsmb.h
# Enable debugnet(4) networking support.
DEBUGNET opt_global.h
# Enable netdump(4) client support.
NETDUMP opt_global.h
# Enable netgdb(4) support.
NETGDB opt_global.h
# Options used only in subr_param.c.
HZ opt_param.h
MAXFILES opt_param.h
NBUF opt_param.h
NSFBUFS opt_param.h
VM_BCACHE_SIZE_MAX opt_param.h
VM_SWZONE_SIZE_MAX opt_param.h
MAXUSERS
DFLDSIZ opt_param.h
MAXDSIZ opt_param.h
MAXSSIZ opt_param.h
# Generic SCSI options.
CAM_MAX_HIGHPOWER opt_cam.h
CAMDEBUG opt_cam.h
CAM_DEBUG_COMPILE opt_cam.h
CAM_DEBUG_DELAY opt_cam.h
CAM_DEBUG_BUS opt_cam.h
CAM_DEBUG_TARGET opt_cam.h
CAM_DEBUG_LUN opt_cam.h
CAM_DEBUG_FLAGS opt_cam.h
CAM_BOOT_DELAY opt_cam.h
CAM_IOSCHED_DYNAMIC opt_cam.h
CAM_IO_STATS opt_cam.h
CAM_TEST_FAILURE opt_cam.h
SCSI_DELAY opt_scsi.h
SCSI_NO_SENSE_STRINGS opt_scsi.h
SCSI_NO_OP_STRINGS opt_scsi.h
# Options used only in cam/ata/ata_da.c
ATA_STATIC_ID opt_ada.h
# Options used only in cam/scsi/scsi_cd.c
CHANGER_MIN_BUSY_SECONDS opt_cd.h
CHANGER_MAX_BUSY_SECONDS opt_cd.h
# Options used only in cam/scsi/scsi_da.c
DA_TRACK_REFS opt_da.h
# Options used only in cam/scsi/scsi_sa.c.
SA_IO_TIMEOUT opt_sa.h
SA_SPACE_TIMEOUT opt_sa.h
SA_REWIND_TIMEOUT opt_sa.h
SA_ERASE_TIMEOUT opt_sa.h
SA_1FM_AT_EOD opt_sa.h
# Options used only in cam/scsi/scsi_pt.c
SCSI_PT_DEFAULT_TIMEOUT opt_pt.h
# Options used only in cam/scsi/scsi_ses.c
SES_ENABLE_PASSTHROUGH opt_ses.h
# Options used in dev/sym/ (Symbios SCSI driver).
SYM_SETUP_SCSI_DIFF opt_sym.h #-HVD support for 825a, 875, 885
# disabled:0 (default), enabled:1
SYM_SETUP_PCI_PARITY opt_sym.h #-PCI parity checking
# disabled:0, enabled:1 (default)
SYM_SETUP_MAX_LUN opt_sym.h #-Number of LUNs supported
# default:8, range:[1..64]
# Options used only in dev/isp/*
ISP_TARGET_MODE opt_isp.h
ISP_FW_CRASH_DUMP opt_isp.h
ISP_DEFAULT_ROLES opt_isp.h
ISP_INTERNAL_TARGET opt_isp.h
ISP_FCTAPE_OFF opt_isp.h
# Options used only in dev/iscsi
ISCSI_INITIATOR_DEBUG opt_iscsi_initiator.h
# Net stuff.
ACCEPT_FILTER_DATA
ACCEPT_FILTER_DNS
ACCEPT_FILTER_HTTP
ALTQ opt_global.h
ALTQ_CBQ opt_altq.h
ALTQ_CDNR opt_altq.h
ALTQ_CODEL opt_altq.h
ALTQ_DEBUG opt_altq.h
ALTQ_HFSC opt_altq.h
ALTQ_FAIRQ opt_altq.h
ALTQ_NOPCC opt_altq.h
ALTQ_PRIQ opt_altq.h
ALTQ_RED opt_altq.h
ALTQ_RIO opt_altq.h
BOOTP opt_bootp.h
BOOTP_BLOCKSIZE opt_bootp.h
BOOTP_COMPAT opt_bootp.h
BOOTP_NFSROOT opt_bootp.h
BOOTP_NFSV3 opt_bootp.h
BOOTP_WIRED_TO opt_bootp.h
DEVICE_POLLING
DUMMYNET opt_ipdn.h
RATELIMIT opt_ratelimit.h
RATELIMIT_DEBUG opt_ratelimit.h
INET opt_inet.h
INET6 opt_inet6.h
STATS opt_global.h
IPDIVERT
IPFILTER opt_ipfilter.h
IPFILTER_DEFAULT_BLOCK opt_ipfilter.h
IPFILTER_LOG opt_ipfilter.h
IPFILTER_LOOKUP opt_ipfilter.h
IPFIREWALL opt_ipfw.h
IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h
IPFIREWALL_NAT opt_ipfw.h
IPFIREWALL_NAT64 opt_ipfw.h
IPFIREWALL_NPTV6 opt_ipfw.h
IPFIREWALL_VERBOSE opt_ipfw.h
IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h
IPFIREWALL_PMOD opt_ipfw.h
IPSEC opt_ipsec.h
IPSEC_DEBUG opt_ipsec.h
IPSEC_SUPPORT opt_ipsec.h
IPSTEALTH
KERN_TLS
KRPC
LIBALIAS
LIBMCHAIN
MBUF_PROFILING
MBUF_STRESS_TEST
MROUTING opt_mrouting.h
NFSLOCKD
NETLINK opt_global.h
PF_DEFAULT_TO_DROP opt_pf.h
ROUTE_MPATH opt_route.h
ROUTETABLES opt_route.h
FIB_ALGO opt_route.h
RSS opt_rss.h
SLIP_IFF_OPTS opt_slip.h
TCPPCAP opt_global.h
SIFTR
TCP_BLACKBOX opt_global.h
TCP_HHOOK opt_global.h
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
TCP_RFC7413 opt_inet.h
TCP_RFC7413_MAX_KEYS opt_inet.h
TCP_RFC7413_MAX_PSKS opt_inet.h
TCP_SIGNATURE opt_ipsec.h
VLAN_ARRAY opt_vlan.h
XDR
XBONEHACK
#
# SCTP
#
SCTP opt_sctp.h
SCTP_SUPPORT opt_sctp.h
SCTP_DEBUG opt_sctp.h # Enable debug printfs
SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity
SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free
SCTP_MBCNT_LOGGING opt_sctp.h # Log to KTR mbcnt activity
SCTP_PACKET_LOGGING opt_sctp.h # Log to a packet buffer last N packets
SCTP_LTRACE_CHUNKS opt_sctp.h # Log to KTR chunks processed
SCTP_LTRACE_ERRORS opt_sctp.h # Log to KTR error returns.
SCTP_USE_PERCPU_STAT opt_sctp.h # Use per cpu stats.
SCTP_MCORE_INPUT opt_sctp.h # Have multiple input threads for input mbufs
SCTP_LOCAL_TRACE_BUF opt_sctp.h # Use tracebuffer exported via sysctl
SCTP_DETAILED_STR_STATS opt_sctp.h # Use per PR-SCTP policy stream stats
#
#
#
# Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
# Each netgraph node type can be either be compiled into the kernel
# or loaded dynamically. To get the former, include the corresponding
# option below. Each type has its own man page, e.g. ng_async(4).
NETGRAPH
NETGRAPH_DEBUG opt_netgraph.h
NETGRAPH_ASYNC opt_netgraph.h
NETGRAPH_BLUETOOTH opt_netgraph.h
NETGRAPH_BLUETOOTH_BT3C opt_netgraph.h
NETGRAPH_BLUETOOTH_H4 opt_netgraph.h
NETGRAPH_BLUETOOTH_HCI opt_netgraph.h
NETGRAPH_BLUETOOTH_L2CAP opt_netgraph.h
NETGRAPH_BLUETOOTH_SOCKET opt_netgraph.h
NETGRAPH_BLUETOOTH_UBT opt_netgraph.h
NETGRAPH_BLUETOOTH_UBTBCMFW opt_netgraph.h
NETGRAPH_BPF opt_netgraph.h
NETGRAPH_BRIDGE opt_netgraph.h
NETGRAPH_CAR opt_netgraph.h
NETGRAPH_CHECKSUM opt_netgraph.h
NETGRAPH_CISCO opt_netgraph.h
NETGRAPH_DEFLATE opt_netgraph.h
NETGRAPH_DEVICE opt_netgraph.h
NETGRAPH_ECHO opt_netgraph.h
NETGRAPH_EIFACE opt_netgraph.h
NETGRAPH_ETHER opt_netgraph.h
NETGRAPH_ETHER_ECHO opt_netgraph.h
NETGRAPH_FEC opt_netgraph.h
NETGRAPH_FRAME_RELAY opt_netgraph.h
NETGRAPH_GIF opt_netgraph.h
NETGRAPH_GIF_DEMUX opt_netgraph.h
NETGRAPH_HOLE opt_netgraph.h
NETGRAPH_IFACE opt_netgraph.h
NETGRAPH_IP_INPUT opt_netgraph.h
NETGRAPH_IPFW opt_netgraph.h
NETGRAPH_KSOCKET opt_netgraph.h
NETGRAPH_L2TP opt_netgraph.h
NETGRAPH_LMI opt_netgraph.h
NETGRAPH_MPPC_COMPRESSION opt_netgraph.h
NETGRAPH_MPPC_ENCRYPTION opt_netgraph.h
NETGRAPH_NAT opt_netgraph.h
NETGRAPH_NETFLOW opt_netgraph.h
NETGRAPH_ONE2MANY opt_netgraph.h
NETGRAPH_PATCH opt_netgraph.h
NETGRAPH_PIPE opt_netgraph.h
NETGRAPH_PPP opt_netgraph.h
NETGRAPH_PPPOE opt_netgraph.h
NETGRAPH_PPTPGRE opt_netgraph.h
NETGRAPH_PRED1 opt_netgraph.h
NETGRAPH_RFC1490 opt_netgraph.h
NETGRAPH_SOCKET opt_netgraph.h
NETGRAPH_SPLIT opt_netgraph.h
NETGRAPH_SPPP opt_netgraph.h
NETGRAPH_TAG opt_netgraph.h
NETGRAPH_TCPMSS opt_netgraph.h
NETGRAPH_TEE opt_netgraph.h
NETGRAPH_TTY opt_netgraph.h
NETGRAPH_UI opt_netgraph.h
NETGRAPH_VJC opt_netgraph.h
NETGRAPH_VLAN opt_netgraph.h
# DRM options
DRM_DEBUG opt_drm.h
TI_SF_BUF_JUMBO opt_ti.h
TI_JUMBO_HDRSPLIT opt_ti.h
# Misc debug flags. Most of these should probably be replaced with
# 'DEBUG', and then let people recompile just the interesting modules
# with 'make CC="cc -DDEBUG"'.
DEBUG_1284 opt_ppb_1284.h
LPT_DEBUG opt_lpt.h
PLIP_DEBUG opt_plip.h
LOCKF_DEBUG opt_debug_lockf.h
SI_DEBUG opt_debug_si.h
IFMEDIA_DEBUG opt_ifmedia.h
# Fb options
FB_DEBUG opt_fb.h
# ppbus related options
PERIPH_1284 opt_ppb_1284.h
DONTPROBE_1284 opt_ppb_1284.h
# smbus related options
ENABLE_ALART opt_intpm.h
# These cause changes all over the kernel
BLKDEV_IOSIZE opt_global.h
BURN_BRIDGES opt_global.h
DEBUG opt_global.h
DEBUG_LOCKS opt_global.h
DEBUG_VFS_LOCKS opt_global.h
DFLTPHYS opt_global.h
DIAGNOSTIC opt_global.h
INVARIANT_SUPPORT opt_global.h
INVARIANTS opt_global.h
KASSERT_PANIC_OPTIONAL opt_global.h
MAXCPU opt_global.h
MAXMEMDOM opt_global.h
MAXPHYS opt_maxphys.h
MCLSHIFT opt_global.h
MUTEX_NOINLINE opt_global.h
LOCK_PROFILING opt_global.h
MSIZE opt_global.h
REGRESSION opt_global.h
RWLOCK_NOINLINE opt_global.h
SX_NOINLINE opt_global.h
VFS_BIO_DEBUG opt_global.h
# These are VM related options
VM_KMEM_SIZE opt_vm.h
VM_KMEM_SIZE_SCALE opt_vm.h
VM_KMEM_SIZE_MAX opt_vm.h
VM_NRESERVLEVEL opt_vm.h
VM_LEVEL_0_ORDER opt_vm.h
NO_SWAPPING opt_vm.h
MALLOC_MAKE_FAILURES opt_vm.h
MALLOC_PROFILE opt_vm.h
MALLOC_DEBUG_MAXZONES opt_vm.h
# The MemGuard replacement allocator used for tamper-after-free detection
DEBUG_MEMGUARD opt_vm.h
# The RedZone malloc(9) protection
DEBUG_REDZONE opt_vm.h
# Standard SMP options
EARLY_AP_STARTUP opt_global.h
SMP opt_global.h
NUMA opt_global.h
+CPUGRP_SCORE opt_global.h
# Size of the kernel message buffer
MSGBUF_SIZE opt_msgbuf.h
# NFS options
NFS_MINATTRTIMO opt_nfs.h
NFS_MAXATTRTIMO opt_nfs.h
NFS_MINDIRATTRTIMO opt_nfs.h
NFS_MAXDIRATTRTIMO opt_nfs.h
NFS_DEBUG opt_nfs.h
# TMPFS options
TMPFS_PAGES_MINRESERVED opt_tmpfs.h
# Options for uart(4)
UART_PPS_ON_CTS opt_uart.h
UART_POLL_FREQ opt_uart.h
UART_DEV_TOLERANCE_PCT opt_uart.h
# options for bus/device framework
BUS_DEBUG opt_bus.h
# options for USB support
USB_DEBUG opt_usb.h
USB_HOST_ALIGN opt_usb.h
USB_REQ_DEBUG opt_usb.h
USB_TEMPLATE opt_usb.h
USB_VERBOSE opt_usb.h
USB_DMA_SINGLE_ALLOC opt_usb.h
USB_EHCI_BIG_ENDIAN_DESC opt_usb.h
U3G_DEBUG opt_u3g.h
UKBD_DFLT_KEYMAP opt_ukbd.h
UPLCOM_INTR_INTERVAL opt_uplcom.h
UVSCOM_DEFAULT_OPKTSIZE opt_uvscom.h
UVSCOM_INTR_INTERVAL opt_uvscom.h
# options for the Realtek rtwn driver
RTWN_DEBUG opt_rtwn.h
RTWN_WITHOUT_UCODE opt_rtwn.h
# Embedded system options
INIT_PATH
ROOTDEVNAME
FDC_DEBUG opt_fdc.h
PCFCLOCK_VERBOSE opt_pcfclock.h
PCFCLOCK_MAX_RETRIES opt_pcfclock.h
KTR opt_global.h
KTR_ALQ opt_ktr.h
KTR_MASK opt_ktr.h
KTR_CPUMASK opt_ktr.h
KTR_COMPILE opt_global.h
KTR_BOOT_ENTRIES opt_global.h
KTR_ENTRIES opt_global.h
KTR_VERBOSE opt_ktr.h
WITNESS opt_global.h
WITNESS_KDB opt_witness.h
WITNESS_NO_VNODE opt_witness.h
WITNESS_SKIPSPIN opt_witness.h
WITNESS_COUNT opt_witness.h
OPENSOLARIS_WITNESS opt_global.h
EPOCH_TRACE opt_global.h
# options for ACPI support
ACPI_DEBUG opt_acpi.h
ACPI_MAX_TASKS opt_acpi.h
ACPI_MAX_THREADS opt_acpi.h
DEV_ACPI opt_acpi.h
ACPI_EARLY_EPYC_WAR opt_acpi.h
# options for IOMMU support
IOMMU opt_iommu.h
# ISA support
DEV_ISA opt_isa.h
ISAPNP opt_dontuse.h
# various 'device presence' options.
DEV_BPF opt_bpf.h
DEV_CARP opt_carp.h
DEV_NETMAP opt_global.h
DEV_PCI opt_pci.h
DEV_PF opt_pf.h
DEV_PFLOG opt_pf.h
DEV_PFSYNC opt_pf.h
DEV_SPLASH opt_splash.h
DEV_VLAN opt_vlan.h
# bce driver
BCE_DEBUG opt_bce.h
BCE_NVRAM_WRITE_SUPPORT opt_bce.h
SOCKBUF_DEBUG opt_global.h
# options for hifn driver
HIFN_DEBUG opt_hifn.h
HIFN_RNDTEST opt_hifn.h
# options for safenet driver
SAFE_DEBUG opt_safe.h
SAFE_NO_RNG opt_safe.h
SAFE_RNDTEST opt_safe.h
# syscons/vt options
MAXCONS opt_syscons.h
SC_ALT_MOUSE_IMAGE opt_syscons.h
SC_CUT_SPACES2TABS opt_syscons.h
SC_CUT_SEPCHARS opt_syscons.h
SC_DEBUG_LEVEL opt_syscons.h
SC_DFLT_FONT opt_syscons.h
SC_DFLT_TERM opt_syscons.h
SC_DISABLE_KDBKEY opt_syscons.h
SC_DISABLE_REBOOT opt_syscons.h
SC_HISTORY_SIZE opt_syscons.h
SC_KERNEL_CONS_ATTR opt_syscons.h
SC_KERNEL_CONS_ATTRS opt_syscons.h
SC_KERNEL_CONS_REV_ATTR opt_syscons.h
SC_MOUSE_CHAR opt_syscons.h
SC_NO_CUTPASTE opt_syscons.h
SC_NO_FONT_LOADING opt_syscons.h
SC_NO_HISTORY opt_syscons.h
SC_NO_MODE_CHANGE opt_syscons.h
SC_NO_SUSPEND_VTYSWITCH opt_syscons.h
SC_NO_SYSMOUSE opt_syscons.h
SC_NO_TERM_DUMB opt_syscons.h
SC_NO_TERM_SC opt_syscons.h
SC_NO_TERM_TEKEN opt_syscons.h
SC_NORM_ATTR opt_syscons.h
SC_NORM_REV_ATTR opt_syscons.h
SC_PIXEL_MODE opt_syscons.h
SC_RENDER_DEBUG opt_syscons.h
SC_TWOBUTTON_MOUSE opt_syscons.h
VT_ALT_TO_ESC_HACK opt_syscons.h
VT_FB_MAX_WIDTH opt_syscons.h
VT_FB_MAX_HEIGHT opt_syscons.h
VT_MAXWINDOWS opt_syscons.h
VT_TWOBUTTON_MOUSE opt_syscons.h
DEV_SC opt_syscons.h
DEV_VT opt_syscons.h
# teken terminal emulator options
TEKEN_CONS25 opt_teken.h
TEKEN_UTF8 opt_teken.h
TERMINAL_KERN_ATTR opt_teken.h
TERMINAL_NORM_ATTR opt_teken.h
# options for printf
PRINTF_BUFR_SIZE opt_printf.h
BOOT_TAG opt_printf.h
BOOT_TAG_SZ opt_printf.h
# kbd options
KBD_DISABLE_KEYMAP_LOAD opt_kbd.h
KBD_INSTALL_CDEV opt_kbd.h
KBD_MAXRETRY opt_kbd.h
KBD_MAXWAIT opt_kbd.h
KBD_RESETDELAY opt_kbd.h
KBD_DELAY1 opt_kbd.h
KBD_DELAY2 opt_kbd.h
KBDIO_DEBUG opt_kbd.h
KBDMUX_DFLT_KEYMAP opt_kbdmux.h
# options for the Atheros driver
ATH_DEBUG opt_ath.h
ATH_TXBUF opt_ath.h
ATH_RXBUF opt_ath.h
ATH_DIAGAPI opt_ath.h
ATH_TX99_DIAG opt_ath.h
ATH_ENABLE_DFS opt_ath.h
ATH_EEPROM_FIRMWARE opt_ath.h
ATH_ENABLE_RADIOTAP_VENDOR_EXT opt_ath.h
ATH_DEBUG_ALQ opt_ath.h
ATH_KTR_INTR_DEBUG opt_ath.h
AH_DEBUG opt_ah.h
AH_ASSERT opt_ah.h
AH_DEBUG_ALQ opt_ah.h
AH_REGOPS_FUNC opt_ah.h
AH_WRITE_REGDOMAIN opt_ah.h
AH_DEBUG_COUNTRY opt_ah.h
AH_WRITE_EEPROM opt_ah.h
AH_PRIVATE_DIAG opt_ah.h
AH_NEED_DESC_SWAP opt_ah.h
AH_USE_INIPDGAIN opt_ah.h
AH_MAXCHAN opt_ah.h
AH_RXCFG_SDMAMW_4BYTES opt_ah.h
AH_INTERRUPT_DEBUGGING opt_ah.h
# AR5416 and later interrupt mitigation
# XXX do not use this for AR9130
AH_AR5416_INTERRUPT_MITIGATION opt_ah.h
# options for the Altera mSGDMA driver (altera_msgdma)
ALTERA_MSGDMA_DESC_STD opt_altera_msgdma.h
ALTERA_MSGDMA_DESC_EXT opt_altera_msgdma.h
ALTERA_MSGDMA_DESC_PF_STD opt_altera_msgdma.h
ALTERA_MSGDMA_DESC_PF_EXT opt_altera_msgdma.h
# options for the Broadcom BCM43xx driver (bwi)
BWI_DEBUG opt_bwi.h
BWI_DEBUG_VERBOSE opt_bwi.h
# options for the Brodacom BCM43xx driver (bwn)
BWN_DEBUG opt_bwn.h
BWN_GPL_PHY opt_bwn.h
BWN_USE_SIBA opt_bwn.h
# Options for the SIBA driver
SIBA_DEBUG opt_siba.h
# options for the Marvell 8335 wireless driver
MALO_DEBUG opt_malo.h
MALO_TXBUF opt_malo.h
MALO_RXBUF opt_malo.h
# options for the Marvell wireless driver
MWL_DEBUG opt_mwl.h
MWL_TXBUF opt_mwl.h
MWL_RXBUF opt_mwl.h
MWL_DIAGAPI opt_mwl.h
MWL_AGGR_SIZE opt_mwl.h
MWL_TX_NODROP opt_mwl.h
# Options for the Marvell NETA driver
MVNETA_MULTIQUEUE opt_mvneta.h
MVNETA_KTR opt_mvneta.h
# Options for the Intel 802.11ac wireless driver
IWM_DEBUG opt_iwm.h
# Options for the Intel 802.11n wireless driver
IWN_DEBUG opt_iwn.h
# Options for the Intel 3945ABG wireless driver
WPI_DEBUG opt_wpi.h
# dcons options
DCONS_BUF_SIZE opt_dcons.h
DCONS_POLL_HZ opt_dcons.h
DCONS_FORCE_CONSOLE opt_dcons.h
DCONS_FORCE_GDB opt_dcons.h
# HWPMC options
HWPMC_DEBUG opt_global.h
HWPMC_HOOKS
# 802.11 support layer
IEEE80211_DEBUG opt_wlan.h
IEEE80211_DEBUG_REFCNT opt_wlan.h
IEEE80211_SUPPORT_MESH opt_wlan.h
IEEE80211_SUPPORT_SUPERG opt_wlan.h
IEEE80211_SUPPORT_TDMA opt_wlan.h
IEEE80211_ALQ opt_wlan.h
IEEE80211_DFS_DEBUG opt_wlan.h
# 802.11 TDMA support
TDMA_SLOTLEN_DEFAULT opt_tdma.h
TDMA_SLOTCNT_DEFAULT opt_tdma.h
TDMA_BINTVAL_DEFAULT opt_tdma.h
TDMA_TXRATE_11B_DEFAULT opt_tdma.h
TDMA_TXRATE_11G_DEFAULT opt_tdma.h
TDMA_TXRATE_11A_DEFAULT opt_tdma.h
TDMA_TXRATE_TURBO_DEFAULT opt_tdma.h
TDMA_TXRATE_HALF_DEFAULT opt_tdma.h
TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h
TDMA_TXRATE_11NA_DEFAULT opt_tdma.h
TDMA_TXRATE_11NG_DEFAULT opt_tdma.h
# VideoMode
PICKMODE_DEBUG opt_videomode.h
# Network stack virtualization options
VIMAGE opt_global.h
VNET_DEBUG opt_global.h
# Common Flash Interface (CFI) options
CFI_SUPPORT_STRATAFLASH opt_cfi.h
CFI_ARMEDANDDANGEROUS opt_cfi.h
CFI_HARDWAREBYTESWAP opt_cfi.h
# Sound options
SND_DEBUG opt_snd.h
SND_DIAGNOSTIC opt_snd.h
SND_FEEDER_MULTIFORMAT opt_snd.h
SND_FEEDER_FULL_MULTIFORMAT opt_snd.h
SND_FEEDER_RATE_HP opt_snd.h
SND_PCM_64 opt_snd.h
SND_OLDSTEREO opt_snd.h
X86BIOS
# Flattened device tree options
FDT opt_platform.h
FDT_DTB_STATIC opt_platform.h
# OFED Infiniband stack
OFED opt_ofed.h
OFED_DEBUG_INIT opt_ofed.h
SDP opt_ofed.h
SDP_DEBUG opt_ofed.h
IPOIB opt_ofed.h
IPOIB_DEBUG opt_ofed.h
IPOIB_CM opt_ofed.h
# Resource Accounting
RACCT opt_global.h
RACCT_DEFAULT_TO_DISABLED opt_global.h
# Resource Limits
RCTL opt_global.h
# Random number generator(s)
# Alternative RNG algorithm.
RANDOM_FENESTRASX opt_global.h
# With this, no entropy processor is loaded, but the entropy
# harvesting infrastructure is present. This means an entropy
# processor may be loaded as a module.
RANDOM_LOADABLE opt_global.h
# This turns on high-rate and potentially expensive harvesting in
# the uma slab allocator.
RANDOM_ENABLE_UMA opt_global.h
RANDOM_ENABLE_ETHER opt_global.h
# This options turns TPM into entropy source.
TPM_HARVEST opt_tpm.h
# BHND(4) driver
BHND_LOGLEVEL opt_global.h
# GPIO and child devices
GPIO_SPI_DEBUG opt_gpio.h
# SPI devices
SPIGEN_LEGACY_CDEVNAME opt_spi.h
# etherswitch(4) driver
RTL8366_SOFT_RESET opt_etherswitch.h
# evdev protocol support
EVDEV_SUPPORT opt_evdev.h
EVDEV_DEBUG opt_evdev.h
UINPUT_DEBUG opt_evdev.h
# Hyper-V network driver
HN_DEBUG opt_hn.h
# CAM-based MMC stack
MMCCAM
# Encrypted kernel crash dumps
EKCD opt_ekcd.h
# NVME options
NVME_USE_NVD opt_nvme.h
# amdsbwd options
AMDSBWD_DEBUG opt_amdsbwd.h
# gcov support
GCOV opt_global.h
LINDEBUGFS
# options for HID support
HID_DEBUG opt_hid.h
IICHID_DEBUG opt_hid.h
IICHID_SAMPLING opt_hid.h
HKBD_DFLT_KEYMAP opt_hkbd.h
HIDRAW_MAKE_UHID_ALIAS opt_hid.h
# kenv options
# The early kernel environment (loader environment, config(8)-provided static)
# is typically cleared after the dynamic environment comes up to ensure that
# we're not inadvertently holding on to 'secret' values in these stale envs.
# This option is insecure except in controlled environments where the static
# environment's contents are known to be safe.
PRESERVE_EARLY_KENV opt_global.h
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 1f9577fddf9c..ec6b753cdf75 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -1,1352 +1,1358 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This module holds the global variables and machine independent functions
* used for the kernel SMP support.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <machine/pcb.h>
#include <machine/smp.h>
#include "opt_sched.h"
+#include "opt_global.h"
#ifdef SMP
MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data");
volatile cpuset_t stopped_cpus;
volatile cpuset_t started_cpus;
volatile cpuset_t suspended_cpus;
cpuset_t hlt_cpus_mask;
cpuset_t logical_cpus_mask;
void (*cpustop_restartfunc)(void);
#endif
static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
/* This is used in modules that need to work in both SMP and UP. */
cpuset_t all_cpus;
int mp_ncpus;
/* export this for libkvm consumers. */
int mp_maxcpus = MAXCPU;
volatile int smp_started;
u_int mp_maxid;
/* Array of CPU contexts saved during a panic. */
struct pcb *stoppcbs;
static SYSCTL_NODE(_kern, OID_AUTO, smp,
CTLFLAG_RD | CTLFLAG_CAPRD | CTLFLAG_MPSAFE, NULL,
"Kernel SMP");
SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
"Max CPU ID.");
SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
0, "Max number of CPUs that the system was compiled for.");
SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD|CTLTYPE_INT|CTLFLAG_MPSAFE,
NULL, 0, sysctl_kern_smp_active, "I",
"Indicates system is running in SMP mode");
int smp_disabled = 0; /* has smp been disabled? */
SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
&smp_disabled, 0, "SMP has been disabled from the loader");
int smp_cpus = 1; /* how many cpu's running */
SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
"Number of CPUs online");
int smp_threads_per_core = 1; /* how many SMT threads are running per core */
SYSCTL_INT(_kern_smp, OID_AUTO, threads_per_core, CTLFLAG_RD|CTLFLAG_CAPRD,
&smp_threads_per_core, 0, "Number of SMT threads online per core");
int mp_ncores = -1; /* how many physical cores running */
SYSCTL_INT(_kern_smp, OID_AUTO, cores, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncores, 0,
"Number of physical cores online");
int smp_topology = 0; /* Which topology we're using. */
SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
"Topology override setting; 0 is default provided by hardware.");
#ifdef SMP
/* Variables needed for SMP rendezvous. */
static volatile int smp_rv_ncpus;
static void (*volatile smp_rv_setup_func)(void *arg);
static void (*volatile smp_rv_action_func)(void *arg);
static void (*volatile smp_rv_teardown_func)(void *arg);
static void *volatile smp_rv_func_arg;
static volatile int smp_rv_waiters[4];
/*
* Shared mutex to restrict busywaits between smp_rendezvous() and
* smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these
* functions trigger at once and cause multiple CPUs to busywait with
* interrupts disabled.
*/
struct mtx smp_ipi_mtx;
/*
* Let the MD SMP code initialize mp_maxid very early if it can.
*/
static void
mp_setmaxid(void *dummy)
{
cpu_mp_setmaxid();
KASSERT(mp_ncpus >= 1, ("%s: CPU count < 1", __func__));
KASSERT(mp_ncpus > 1 || mp_maxid == 0,
("%s: one CPU but mp_maxid is not zero", __func__));
KASSERT(mp_maxid >= mp_ncpus - 1,
("%s: counters out of sync: max %d, count %d", __func__,
mp_maxid, mp_ncpus));
cpusetsizemin = howmany(mp_maxid + 1, NBBY);
}
SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
/*
* Call the MD SMP initialization code.
*/
static void
mp_start(void *dummy)
{
mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
/* Probe for MP hardware. */
if (smp_disabled != 0 || cpu_mp_probe() == 0) {
mp_ncores = 1;
mp_ncpus = 1;
CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
return;
}
cpu_mp_start();
printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
mp_ncpus);
/* Provide a default for most architectures that don't have SMT/HTT. */
if (mp_ncores < 0)
mp_ncores = mp_ncpus;
stoppcbs = mallocarray(mp_maxid + 1, sizeof(struct pcb), M_DEVBUF,
M_WAITOK | M_ZERO);
cpu_mp_announce();
}
SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
void
forward_signal(struct thread *td)
{
int id;
/*
* signotify() has already set TDA_AST and TDA_SIG on td_ast for
* this thread, so all we need to do is poke it if it is currently
* executing so that it executes ast().
*/
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(TD_IS_RUNNING(td),
("forward_signal: thread is not TDS_RUNNING"));
CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
if (!smp_started || cold || KERNEL_PANICKED())
return;
/* No need to IPI ourself. */
if (td == curthread)
return;
id = td->td_oncpu;
if (id == NOCPU)
return;
ipi_cpu(id, IPI_AST);
}
/*
* When called the executing CPU will send an IPI to all other CPUs
* requesting that they halt execution.
*
* Usually (but not necessarily) called with 'other_cpus' as its arg.
*
* - Signals all CPUs in map to stop.
* - Waits for each to stop.
*
* Returns:
* -1: error
* 0: NA
* 1: ok
*
*/
#if defined(__amd64__) || defined(__i386__)
#define X86 1
#else
#define X86 0
#endif
static int
generic_stop_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
static volatile u_int stopping_cpu = NOCPU;
int i;
volatile cpuset_t *cpus;
KASSERT(
type == IPI_STOP || type == IPI_STOP_HARD
#if X86
|| type == IPI_SUSPEND
#endif
, ("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
cpusetobj_strprint(cpusetbuf, &map), type);
#if X86
/*
* When suspending, ensure there are are no IPIs in progress.
* IPIs that have been issued, but not yet delivered (e.g.
* not pending on a vCPU when running under virtualization)
* will be lost, violating FreeBSD's assumption of reliable
* IPI delivery.
*/
if (type == IPI_SUSPEND)
mtx_lock_spin(&smp_ipi_mtx);
#endif
#if X86
if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
#endif
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
PCPU_GET(cpuid)) == 0)
while (stopping_cpu != NOCPU)
cpu_spinwait(); /* spin */
/* send the stop IPI to all CPUs in map */
ipi_selected(map, type);
#if X86
}
#endif
#if X86
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
#endif
cpus = &stopped_cpus;
i = 0;
while (!CPU_SUBSET(cpus, &map)) {
/* spin */
cpu_spinwait();
i++;
if (i == 100000000) {
printf("timeout stopping cpus\n");
break;
}
}
#if X86
if (type == IPI_SUSPEND)
mtx_unlock_spin(&smp_ipi_mtx);
#endif
stopping_cpu = NOCPU;
return (1);
}
int
stop_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP));
}
int
stop_cpus_hard(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP_HARD));
}
#if X86
int
suspend_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_SUSPEND));
}
#endif
/*
* Called by a CPU to restart stopped CPUs.
*
* Usually (but not necessarily) called with 'stopped_cpus' as its arg.
*
* - Signals all CPUs in map to restart.
* - Waits for each to restart.
*
* Returns:
* -1: error
* 0: NA
* 1: ok
*/
static int
generic_restart_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
volatile cpuset_t *cpus;
#if X86
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
|| type == IPI_SUSPEND, ("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
if (type == IPI_SUSPEND)
cpus = &resuming_cpus;
else
cpus = &stopped_cpus;
/* signal other cpus to restart */
if (type == IPI_SUSPEND)
CPU_COPY_STORE_REL(&map, &toresume_cpus);
else
CPU_COPY_STORE_REL(&map, &started_cpus);
/*
* Wake up any CPUs stopped with MWAIT. From MI code we can't tell if
* MONITOR/MWAIT is enabled, but the potentially redundant writes are
* relatively inexpensive.
*/
if (type == IPI_STOP) {
struct monitorbuf *mb;
u_int id;
CPU_FOREACH(id) {
if (!CPU_ISSET(id, &map))
continue;
mb = &pcpu_find(id)->pc_monitorbuf;
atomic_store_int(&mb->stop_state,
MONITOR_STOPSTATE_RUNNING);
}
}
if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
}
#else /* !X86 */
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
cpus = &stopped_cpus;
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
#endif
return (1);
}
int
restart_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_STOP));
}
#if X86
int
resume_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_SUSPEND));
}
#endif
#undef X86
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function
* (if specified), rendezvous, execute the action function (if specified),
* rendezvous again, execute the teardown function (if specified), and then
* resume.
*
* Note that the supplied external functions _must_ be reentrant and aware
* that they are running in parallel and in an unknown lock context.
*/
void
smp_rendezvous_action(void)
{
struct thread *td;
void *local_func_arg;
void (*local_setup_func)(void*);
void (*local_action_func)(void*);
void (*local_teardown_func)(void*);
#ifdef INVARIANTS
int owepreempt;
#endif
/* Ensure we have up-to-date values. */
atomic_add_acq_int(&smp_rv_waiters[0], 1);
while (smp_rv_waiters[0] < smp_rv_ncpus)
cpu_spinwait();
/* Fetch rendezvous parameters after acquire barrier. */
local_func_arg = smp_rv_func_arg;
local_setup_func = smp_rv_setup_func;
local_action_func = smp_rv_action_func;
local_teardown_func = smp_rv_teardown_func;
/*
* Use a nested critical section to prevent any preemptions
* from occurring during a rendezvous action routine.
* Specifically, if a rendezvous handler is invoked via an IPI
* and the interrupted thread was in the critical_exit()
* function after setting td_critnest to 0 but before
* performing a deferred preemption, this routine can be
* invoked with td_critnest set to 0 and td_owepreempt true.
* In that case, a critical_exit() during the rendezvous
* action would trigger a preemption which is not permitted in
* a rendezvous action. To fix this, wrap all of the
* rendezvous action handlers in a critical section. We
* cannot use a regular critical section however as having
* critical_exit() preempt from this routine would also be
* problematic (the preemption must not occur before the IPI
* has been acknowledged via an EOI). Instead, we
* intentionally ignore td_owepreempt when leaving the
* critical section. This should be harmless because we do
* not permit rendezvous action routines to schedule threads,
* and thus td_owepreempt should never transition from 0 to 1
* during this routine.
*/
td = curthread;
td->td_critnest++;
#ifdef INVARIANTS
owepreempt = td->td_owepreempt;
#endif
/*
* If requested, run a setup function before the main action
* function. Ensure all CPUs have completed the setup
* function before moving on to the action function.
*/
if (local_setup_func != smp_no_rendezvous_barrier) {
if (local_setup_func != NULL)
local_setup_func(local_func_arg);
atomic_add_int(&smp_rv_waiters[1], 1);
while (smp_rv_waiters[1] < smp_rv_ncpus)
cpu_spinwait();
}
if (local_action_func != NULL)
local_action_func(local_func_arg);
if (local_teardown_func != smp_no_rendezvous_barrier) {
/*
* Signal that the main action has been completed. If a
* full exit rendezvous is requested, then all CPUs will
* wait here until all CPUs have finished the main action.
*/
atomic_add_int(&smp_rv_waiters[2], 1);
while (smp_rv_waiters[2] < smp_rv_ncpus)
cpu_spinwait();
if (local_teardown_func != NULL)
local_teardown_func(local_func_arg);
}
/*
* Signal that the rendezvous is fully completed by this CPU.
* This means that no member of smp_rv_* pseudo-structure will be
* accessed by this target CPU after this point; in particular,
* memory pointed by smp_rv_func_arg.
*
* The release semantic ensures that all accesses performed by
* the current CPU are visible when smp_rendezvous_cpus()
* returns, by synchronizing with the
* atomic_load_acq_int(&smp_rv_waiters[3]).
*/
atomic_add_rel_int(&smp_rv_waiters[3], 1);
td->td_critnest--;
KASSERT(owepreempt == td->td_owepreempt,
("rendezvous action changed td_owepreempt"));
}
void
smp_rendezvous_cpus(cpuset_t map,
void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
int curcpumap, i, ncpus = 0;
/* See comments in the !SMP case. */
if (!smp_started) {
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
return;
}
/*
* Make sure we come here with interrupts enabled. Otherwise we
* livelock if smp_ipi_mtx is owned by a thread which sent us an IPI.
*/
MPASS(curthread->td_md.md_spinlock_count == 0);
CPU_FOREACH(i) {
if (CPU_ISSET(i, &map))
ncpus++;
}
if (ncpus == 0)
panic("ncpus is 0 with non-zero map");
mtx_lock_spin(&smp_ipi_mtx);
/* Pass rendezvous parameters via global variables. */
smp_rv_ncpus = ncpus;
smp_rv_setup_func = setup_func;
smp_rv_action_func = action_func;
smp_rv_teardown_func = teardown_func;
smp_rv_func_arg = arg;
smp_rv_waiters[1] = 0;
smp_rv_waiters[2] = 0;
smp_rv_waiters[3] = 0;
atomic_store_rel_int(&smp_rv_waiters[0], 0);
/*
* Signal other processors, which will enter the IPI with
* interrupts off.
*/
curcpumap = CPU_ISSET(curcpu, &map);
CPU_CLR(curcpu, &map);
ipi_selected(map, IPI_RENDEZVOUS);
/* Check if the current CPU is in the map */
if (curcpumap != 0)
smp_rendezvous_action();
/*
* Ensure that the master CPU waits for all the other
* CPUs to finish the rendezvous, so that smp_rv_*
* pseudo-structure and the arg are guaranteed to not
* be in use.
*
* Load acquire synchronizes with the release add in
* smp_rendezvous_action(), which ensures that our caller sees
* all memory actions done by the called functions on other
* CPUs.
*/
while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
cpu_spinwait();
mtx_unlock_spin(&smp_ipi_mtx);
}
void
smp_rendezvous(void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
}
static void
smp_topo_fill(struct cpu_group *cg)
{
int c;
for (c = 0; c < cg->cg_children; c++)
smp_topo_fill(&cg->cg_child[c]);
cg->cg_first = CPU_FFS(&cg->cg_mask) - 1;
cg->cg_last = CPU_FLS(&cg->cg_mask) - 1;
}
struct cpu_group *
smp_topo(void)
{
char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
static struct cpu_group *top = NULL;
/*
* The first call to smp_topo() is guaranteed to occur
* during the kernel boot while we are still single-threaded.
*/
if (top != NULL)
return (top);
/*
* Check for a fake topology request for debugging purposes.
*/
switch (smp_topology) {
case 1:
/* Dual core with no sharing. */
top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
break;
case 2:
/* No topology, all cpus are equal. */
top = smp_topo_none();
break;
case 3:
/* Dual core with shared L2. */
top = smp_topo_1level(CG_SHARE_L2, 2, 0);
break;
case 4:
/* quad core, shared l3 among each package, private l2. */
top = smp_topo_1level(CG_SHARE_L3, 4, 0);
break;
case 5:
/* quad core, 2 dualcore parts on each package share l2. */
top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
break;
case 6:
/* Single-core 2xHTT */
top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
break;
case 7:
/* quad core with a shared l3, 8 threads sharing L2. */
top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
CG_FLAG_SMT);
break;
default:
/* Default, ask the system what it wants. */
top = cpu_topo();
break;
}
/*
* Verify the returned topology.
*/
if (top->cg_count != mp_ncpus)
panic("Built bad topology at %p. CPU count %d != %d",
top, top->cg_count, mp_ncpus);
if (CPU_CMP(&top->cg_mask, &all_cpus))
panic("Built bad topology at %p. CPU mask (%s) != (%s)",
top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
cpusetobj_strprint(cpusetbuf2, &all_cpus));
/*
* Collapse nonsense levels that may be created out of convenience by
* the MD layers. They cause extra work in the search functions.
*/
while (top->cg_children == 1) {
top = &top->cg_child[0];
top->cg_parent = NULL;
}
smp_topo_fill(top);
return (top);
}
struct cpu_group *
smp_topo_alloc(u_int count)
{
static struct cpu_group *group = NULL;
static u_int index;
u_int curr;
if (group == NULL) {
group = mallocarray((mp_maxid + 1) * MAX_CACHE_LEVELS + 1,
sizeof(*group), M_DEVBUF, M_WAITOK | M_ZERO);
}
curr = index;
index += count;
return (&group[curr]);
}
struct cpu_group *
smp_topo_none(void)
{
struct cpu_group *top;
top = smp_topo_alloc(1);
top->cg_parent = NULL;
top->cg_child = NULL;
top->cg_mask = all_cpus;
top->cg_count = mp_ncpus;
top->cg_children = 0;
top->cg_level = CG_SHARE_NONE;
top->cg_flags = 0;
-
+#if defined(CPUGRP_SCORE)
+ memset(top->cg_score, CG_SCORE_DEFAULT, sizeof(top->cg_score));
+#endif
return (top);
}
static int
smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
int count, int flags, int start)
{
char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
cpuset_t mask;
int i;
CPU_ZERO(&mask);
for (i = 0; i < count; i++, start++)
CPU_SET(start, &mask);
child->cg_parent = parent;
child->cg_child = NULL;
child->cg_children = 0;
child->cg_level = share;
child->cg_count = count;
child->cg_flags = flags;
child->cg_mask = mask;
+#if defined(CPUGRP_SCORE)
+ memset(child->cg_score, CG_SCORE_DEFAULT, sizeof(child->cg_score));
+#endif
parent->cg_children++;
for (; parent != NULL; parent = parent->cg_parent) {
if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
panic("Duplicate children in %p. mask (%s) child (%s)",
parent,
cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
CPU_OR(&parent->cg_mask, &parent->cg_mask, &child->cg_mask);
parent->cg_count += child->cg_count;
}
return (start);
}
struct cpu_group *
smp_topo_1level(int share, int count, int flags)
{
struct cpu_group *child;
struct cpu_group *top;
int packages;
int cpu;
int i;
cpu = 0;
packages = mp_ncpus / count;
top = smp_topo_alloc(1 + packages);
top->cg_child = child = top + 1;
top->cg_level = CG_SHARE_NONE;
for (i = 0; i < packages; i++, child++)
cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
return (top);
}
struct cpu_group *
smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
int l1flags)
{
struct cpu_group *top;
struct cpu_group *l1g;
struct cpu_group *l2g;
int cpu;
int i;
int j;
cpu = 0;
top = smp_topo_alloc(1 + mp_ncpus / (l2count * l1count) +
mp_ncpus / l1count);
l2g = top + 1;
top->cg_child = l2g;
top->cg_level = CG_SHARE_NONE;
top->cg_children = mp_ncpus / (l2count * l1count);
l1g = l2g + top->cg_children;
for (i = 0; i < top->cg_children; i++, l2g++) {
l2g->cg_parent = top;
l2g->cg_child = l1g;
l2g->cg_level = l2share;
for (j = 0; j < l2count; j++, l1g++)
cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
l1flags, cpu);
}
return (top);
}
struct cpu_group *
smp_topo_find(struct cpu_group *top, int cpu)
{
struct cpu_group *cg;
cpuset_t mask;
int children;
int i;
CPU_SETOF(cpu, &mask);
cg = top;
for (;;) {
if (!CPU_OVERLAP(&cg->cg_mask, &mask))
return (NULL);
if (cg->cg_children == 0)
return (cg);
children = cg->cg_children;
for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
if (CPU_OVERLAP(&cg->cg_mask, &mask))
break;
}
return (NULL);
}
#else /* !SMP */
void
smp_rendezvous_cpus(cpuset_t map,
void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
void *arg)
{
/*
* In the !SMP case we just need to ensure the same initial conditions
* as the SMP case.
*/
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
}
void
smp_rendezvous(void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
void *arg)
{
smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func,
arg);
}
/*
* Provide dummy SMP support for UP kernels. Modules that need to use SMP
* APIs will still work using this dummy support.
*/
static void
mp_setvariables_for_up(void *dummy)
{
mp_ncpus = 1;
mp_ncores = 1;
mp_maxid = PCPU_GET(cpuid);
CPU_SETOF(mp_maxid, &all_cpus);
KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
}
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
mp_setvariables_for_up, NULL);
#endif /* SMP */
void
smp_no_rendezvous_barrier(void *dummy)
{
#ifdef SMP
KASSERT((!smp_started),("smp_no_rendezvous called and smp is started"));
#endif
}
void
smp_rendezvous_cpus_retry(cpuset_t map,
void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void (* wait_func)(void *, int),
struct smp_rendezvous_cpus_retry_arg *arg)
{
int cpu;
CPU_COPY(&map, &arg->cpus);
/*
* Only one CPU to execute on.
*/
if (!smp_started) {
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
return;
}
/*
* Execute an action on all specified CPUs while retrying until they
* all acknowledge completion.
*/
for (;;) {
smp_rendezvous_cpus(
arg->cpus,
setup_func,
action_func,
teardown_func,
arg);
if (CPU_EMPTY(&arg->cpus))
break;
CPU_FOREACH(cpu) {
if (!CPU_ISSET(cpu, &arg->cpus))
continue;
wait_func(arg, cpu);
}
}
}
void
smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *arg)
{
CPU_CLR_ATOMIC(curcpu, &arg->cpus);
}
/*
* If (prio & PDROP) == 0:
* Wait for specified idle threads to switch once. This ensures that even
* preempted threads have cycled through the switch function once,
* exiting their codepaths. This allows us to change global pointers
* with no other synchronization.
* If (prio & PDROP) != 0:
* Force the specified CPUs to switch context at least once.
*/
int
quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
{
struct pcpu *pcpu;
u_int *gen;
int error;
int cpu;
error = 0;
if ((prio & PDROP) == 0) {
gen = mallocarray(sizeof(u_int), mp_maxid + 1, M_TEMP,
M_WAITOK);
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
continue;
pcpu = pcpu_find(cpu);
gen[cpu] = pcpu->pc_idlethread->td_generation;
}
}
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
continue;
pcpu = pcpu_find(cpu);
thread_lock(curthread);
sched_bind(curthread, cpu);
thread_unlock(curthread);
if ((prio & PDROP) != 0)
continue;
while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
error = tsleep(quiesce_cpus, prio & ~PDROP, wmesg, 1);
if (error != EWOULDBLOCK)
goto out;
error = 0;
}
}
out:
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
if ((prio & PDROP) == 0)
free(gen, M_TEMP);
return (error);
}
int
quiesce_all_cpus(const char *wmesg, int prio)
{
return quiesce_cpus(all_cpus, wmesg, prio);
}
/*
* Observe all CPUs not executing in critical section.
* We are not in one so the check for us is safe. If the found
* thread changes to something else we know the section was
* exited as well.
*/
void
quiesce_all_critical(void)
{
struct thread *td, *newtd;
struct pcpu *pcpu;
int cpu;
MPASS(curthread->td_critnest == 0);
CPU_FOREACH(cpu) {
pcpu = cpuid_to_pcpu[cpu];
td = pcpu->pc_curthread;
for (;;) {
if (td->td_critnest == 0)
break;
cpu_spinwait();
newtd = (struct thread *)
atomic_load_acq_ptr((void *)pcpu->pc_curthread);
if (td != newtd)
break;
}
}
}
static void
cpus_fence_seq_cst_issue(void *arg __unused)
{
atomic_thread_fence_seq_cst();
}
/*
* Send an IPI forcing a sequentially consistent fence.
*
* Allows replacement of an explicitly fence with a compiler barrier.
* Trades speed up during normal execution for a significant slowdown when
* the barrier is needed.
*/
void
cpus_fence_seq_cst(void)
{
#ifdef SMP
smp_rendezvous(
smp_no_rendezvous_barrier,
cpus_fence_seq_cst_issue,
smp_no_rendezvous_barrier,
NULL
);
#else
cpus_fence_seq_cst_issue(NULL);
#endif
}
/* Extra care is taken with this sysctl because the data type is volatile */
static int
sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
{
int error, active;
active = smp_started;
error = SYSCTL_OUT(req, &active, sizeof(active));
return (error);
}
#ifdef SMP
void
topo_init_node(struct topo_node *node)
{
bzero(node, sizeof(*node));
TAILQ_INIT(&node->children);
}
void
topo_init_root(struct topo_node *root)
{
topo_init_node(root);
root->type = TOPO_TYPE_SYSTEM;
}
/*
* Add a child node with the given ID under the given parent.
* Do nothing if there is already a child with that ID.
*/
struct topo_node *
topo_add_node_by_hwid(struct topo_node *parent, int hwid,
topo_node_type type, uintptr_t subtype)
{
struct topo_node *node;
TAILQ_FOREACH_REVERSE(node, &parent->children,
topo_children, siblings) {
if (node->hwid == hwid
&& node->type == type && node->subtype == subtype) {
return (node);
}
}
node = malloc(sizeof(*node), M_TOPO, M_WAITOK);
topo_init_node(node);
node->parent = parent;
node->hwid = hwid;
node->type = type;
node->subtype = subtype;
TAILQ_INSERT_TAIL(&parent->children, node, siblings);
parent->nchildren++;
return (node);
}
/*
* Find a child node with the given ID under the given parent.
*/
struct topo_node *
topo_find_node_by_hwid(struct topo_node *parent, int hwid,
topo_node_type type, uintptr_t subtype)
{
struct topo_node *node;
TAILQ_FOREACH(node, &parent->children, siblings) {
if (node->hwid == hwid
&& node->type == type && node->subtype == subtype) {
return (node);
}
}
return (NULL);
}
/*
* Given a node change the order of its parent's child nodes such
* that the node becomes the firt child while preserving the cyclic
* order of the children. In other words, the given node is promoted
* by rotation.
*/
void
topo_promote_child(struct topo_node *child)
{
struct topo_node *next;
struct topo_node *node;
struct topo_node *parent;
parent = child->parent;
next = TAILQ_NEXT(child, siblings);
TAILQ_REMOVE(&parent->children, child, siblings);
TAILQ_INSERT_HEAD(&parent->children, child, siblings);
while (next != NULL) {
node = next;
next = TAILQ_NEXT(node, siblings);
TAILQ_REMOVE(&parent->children, node, siblings);
TAILQ_INSERT_AFTER(&parent->children, child, node, siblings);
child = node;
}
}
/*
* Iterate to the next node in the depth-first search (traversal) of
* the topology tree.
*/
struct topo_node *
topo_next_node(struct topo_node *top, struct topo_node *node)
{
struct topo_node *next;
if ((next = TAILQ_FIRST(&node->children)) != NULL)
return (next);
if ((next = TAILQ_NEXT(node, siblings)) != NULL)
return (next);
while (node != top && (node = node->parent) != top)
if ((next = TAILQ_NEXT(node, siblings)) != NULL)
return (next);
return (NULL);
}
/*
* Iterate to the next node in the depth-first search of the topology tree,
* but without descending below the current node.
*/
struct topo_node *
topo_next_nonchild_node(struct topo_node *top, struct topo_node *node)
{
struct topo_node *next;
if ((next = TAILQ_NEXT(node, siblings)) != NULL)
return (next);
while (node != top && (node = node->parent) != top)
if ((next = TAILQ_NEXT(node, siblings)) != NULL)
return (next);
return (NULL);
}
/*
* Assign the given ID to the given topology node that represents a logical
* processor.
*/
void
topo_set_pu_id(struct topo_node *node, cpuid_t id)
{
KASSERT(node->type == TOPO_TYPE_PU,
("topo_set_pu_id: wrong node type: %u", node->type));
KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0,
("topo_set_pu_id: cpuset already not empty"));
node->id = id;
CPU_SET(id, &node->cpuset);
node->cpu_count = 1;
node->subtype = 1;
while ((node = node->parent) != NULL) {
KASSERT(!CPU_ISSET(id, &node->cpuset),
("logical ID %u is already set in node %p", id, node));
CPU_SET(id, &node->cpuset);
node->cpu_count++;
}
}
static struct topology_spec {
topo_node_type type;
bool match_subtype;
uintptr_t subtype;
} topology_level_table[TOPO_LEVEL_COUNT] = {
[TOPO_LEVEL_PKG] = { .type = TOPO_TYPE_PKG, },
[TOPO_LEVEL_GROUP] = { .type = TOPO_TYPE_GROUP, },
[TOPO_LEVEL_CACHEGROUP] = {
.type = TOPO_TYPE_CACHE,
.match_subtype = true,
.subtype = CG_SHARE_L3,
},
[TOPO_LEVEL_CORE] = { .type = TOPO_TYPE_CORE, },
[TOPO_LEVEL_THREAD] = { .type = TOPO_TYPE_PU, },
};
static bool
topo_analyze_table(struct topo_node *root, int all, enum topo_level level,
struct topo_analysis *results)
{
struct topology_spec *spec;
struct topo_node *node;
int count;
if (level >= TOPO_LEVEL_COUNT)
return (true);
spec = &topology_level_table[level];
count = 0;
node = topo_next_node(root, root);
while (node != NULL) {
if (node->type != spec->type ||
(spec->match_subtype && node->subtype != spec->subtype)) {
node = topo_next_node(root, node);
continue;
}
if (!all && CPU_EMPTY(&node->cpuset)) {
node = topo_next_nonchild_node(root, node);
continue;
}
count++;
if (!topo_analyze_table(node, all, level + 1, results))
return (false);
node = topo_next_nonchild_node(root, node);
}
/* No explicit subgroups is essentially one subgroup. */
if (count == 0) {
count = 1;
if (!topo_analyze_table(root, all, level + 1, results))
return (false);
}
if (results->entities[level] == -1)
results->entities[level] = count;
else if (results->entities[level] != count)
return (false);
return (true);
}
/*
* Check if the topology is uniform, that is, each package has the same number
* of cores in it and each core has the same number of threads (logical
* processors) in it. If so, calculate the number of packages, the number of
* groups per package, the number of cachegroups per group, and the number of
* logical processors per cachegroup. 'all' parameter tells whether to include
* administratively disabled logical processors into the analysis.
*/
int
topo_analyze(struct topo_node *topo_root, int all,
struct topo_analysis *results)
{
results->entities[TOPO_LEVEL_PKG] = -1;
results->entities[TOPO_LEVEL_CORE] = -1;
results->entities[TOPO_LEVEL_THREAD] = -1;
results->entities[TOPO_LEVEL_GROUP] = -1;
results->entities[TOPO_LEVEL_CACHEGROUP] = -1;
if (!topo_analyze_table(topo_root, all, TOPO_LEVEL_PKG, results))
return (0);
KASSERT(results->entities[TOPO_LEVEL_PKG] > 0,
("bug in topology or analysis"));
return (1);
}
#endif /* SMP */
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 252dc9dc1cae..735cad0439a3 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -1,298 +1,311 @@
/*-
* SPDX-License-Identifier: Beerware
*
* ----------------------------------------------------------------------------
* "THE BEER-WARE LICENSE" (Revision 42):
* <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
* can do whatever you want with this stuff. If we meet some day, and you think
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
*/
#ifndef _SYS_SMP_H_
#define _SYS_SMP_H_
#ifdef _KERNEL
#ifndef LOCORE
#include <sys/cpuset.h>
#include <sys/queue.h>
+#include "opt_global.h"
+
/*
* Types of nodes in the topological tree.
*/
typedef enum {
/* No node has this type; can be used in topo API calls. */
TOPO_TYPE_DUMMY,
/* Processing unit aka computing unit aka logical CPU. */
TOPO_TYPE_PU,
/* Physical subdivision of a package. */
TOPO_TYPE_CORE,
/* CPU L1/L2/L3 cache. */
TOPO_TYPE_CACHE,
/* Package aka chip, equivalent to socket. */
TOPO_TYPE_PKG,
/* NUMA node. */
TOPO_TYPE_NODE,
/* Other logical or physical grouping of PUs. */
/* E.g. PUs on the same dye, or PUs sharing an FPU. */
TOPO_TYPE_GROUP,
/* The whole system. */
TOPO_TYPE_SYSTEM
} topo_node_type;
/* Hardware indenitifier of a topology component. */
typedef unsigned int hwid_t;
/* Logical CPU idenitifier. */
typedef int cpuid_t;
/* A node in the topology. */
struct topo_node {
struct topo_node *parent;
TAILQ_HEAD(topo_children, topo_node) children;
TAILQ_ENTRY(topo_node) siblings;
cpuset_t cpuset;
topo_node_type type;
uintptr_t subtype;
hwid_t hwid;
cpuid_t id;
int nchildren;
int cpu_count;
};
/*
* Scheduling topology of a NUMA or SMP system.
*
* The top level topology is an array of pointers to groups. Each group
* contains a bitmask of cpus in its group or subgroups. It may also
* contain a pointer to an array of child groups.
*
* The bitmasks at non leaf groups may be used by consumers who support
* a smaller depth than the hardware provides.
*
* The topology may be omitted by systems where all CPUs are equal.
*/
+#if defined(CPUGRP_SCORE)
+#define CG_SCORE_CLASS_MAX 8
+#define CG_SCORE_CAPABILITY_MAX 2
+
+#define CG_SCORE_DEFAULT 0x80
+#endif
+
struct cpu_group {
struct cpu_group *cg_parent; /* Our parent group. */
struct cpu_group *cg_child; /* Optional children groups. */
cpuset_t cg_mask; /* Mask of cpus in this group. */
int32_t cg_count; /* Count of cpus in this group. */
int32_t cg_first; /* First cpu in this group. */
int32_t cg_last; /* Last cpu in this group. */
int16_t cg_children; /* Number of children groups. */
int8_t cg_level; /* Shared cache level. */
int8_t cg_flags; /* Traversal modifiers. */
+#if defined(CPUGRP_SCORE)
+ uint8_t cg_score[CG_SCORE_CLASS_MAX][CG_SCORE_CAPABILITY_MAX];
+ /* Performance/Efficiency Score from Intel HFI/ITD */
+#endif
};
typedef struct cpu_group *cpu_group_t;
/*
* Defines common resources for CPUs in the group. The highest level
* resource should be used when multiple are shared.
*/
#define CG_SHARE_NONE 0
#define CG_SHARE_L1 1
#define CG_SHARE_L2 2
#define CG_SHARE_L3 3
#define MAX_CACHE_LEVELS CG_SHARE_L3
/*
* Behavior modifiers for load balancing and affinity.
*/
#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */
#define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */
#define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */
#define CG_FLAG_NODE 0x04 /* NUMA node. */
/*
* Convenience routines for building and traversing topologies.
*/
#ifdef SMP
void topo_init_node(struct topo_node *node);
void topo_init_root(struct topo_node *root);
struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
topo_node_type type, uintptr_t subtype);
struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
topo_node_type type, uintptr_t subtype);
void topo_promote_child(struct topo_node *child);
struct topo_node * topo_next_node(struct topo_node *top,
struct topo_node *node);
struct topo_node * topo_next_nonchild_node(struct topo_node *top,
struct topo_node *node);
void topo_set_pu_id(struct topo_node *node, cpuid_t id);
enum topo_level {
TOPO_LEVEL_PKG = 0,
/*
* Some systems have useful sub-package core organizations. On these,
* a package has one or more subgroups. Each subgroup contains one or
* more cache groups (cores that share a last level cache).
*/
TOPO_LEVEL_GROUP,
TOPO_LEVEL_CACHEGROUP,
TOPO_LEVEL_CORE,
TOPO_LEVEL_THREAD,
TOPO_LEVEL_COUNT /* Must be last */
};
struct topo_analysis {
int entities[TOPO_LEVEL_COUNT];
};
int topo_analyze(struct topo_node *topo_root, int all,
struct topo_analysis *results);
#define TOPO_FOREACH(i, root) \
for (i = root; i != NULL; i = topo_next_node(root, i))
struct cpu_group *smp_topo(void);
struct cpu_group *smp_topo_alloc(u_int count);
struct cpu_group *smp_topo_none(void);
struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
int l1count, int l1flags);
struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
extern void (*cpustop_restartfunc)(void);
/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */
extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */
extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */
extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */
extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */
extern cpuset_t logical_cpus_mask;
#endif /* SMP */
extern u_int mp_maxid;
extern int mp_maxcpus;
extern int mp_ncores;
extern int mp_ncpus;
extern int smp_cpus;
extern volatile int smp_started;
extern int smp_threads_per_core;
extern cpuset_t all_cpus;
extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */
struct pcb;
extern struct pcb *stoppcbs;
/*
* Macro allowing us to determine whether a CPU is absent at any given
* time, thus permitting us to configure sparse maps of cpuid-dependent
* (per-CPU) structures.
*/
#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus))
/*
* Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an
* integer iterator and iterates over the available set of CPUs.
* CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT()
* returns the id of the next non-absent CPU. It will wrap back to
* CPU_FIRST() once the end of the list is reached. The iterators are
* currently implemented via inline functions.
*/
#define CPU_FOREACH(i) \
for ((i) = 0; (i) <= mp_maxid; (i)++) \
if (!CPU_ABSENT((i)))
static __inline int
cpu_first(void)
{
int i;
for (i = 0;; i++)
if (!CPU_ABSENT(i))
return (i);
}
static __inline int
cpu_next(int i)
{
for (;;) {
i++;
if ((u_int)i > mp_maxid)
i = 0;
if (!CPU_ABSENT(i))
return (i);
}
}
#define CPU_FIRST() cpu_first()
#define CPU_NEXT(i) cpu_next((i))
#ifdef SMP
/*
* Machine dependent functions used to initialize MP support.
*
* The cpu_mp_probe() should check to see if MP support is present and return
* zero if it is not or non-zero if it is. If MP support is present, then
* cpu_mp_start() will be called so that MP can be enabled. This function
* should do things such as startup secondary processors. It should also
* setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that
* smp_started is initialized at the appropriate time.
* Once cpu_mp_start() returns, machine independent MP startup code will be
* executed and a simple message will be output to the console. Finally,
* cpu_mp_announce() will be called so that machine dependent messages about
* the MP support may be output to the console if desired.
*
* The cpu_setmaxid() function is called very early during the boot process
* so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
* that other subsystems may use. If a platform is not able to determine
* the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
*/
struct thread;
struct cpu_group *cpu_topo(void);
void cpu_mp_announce(void);
int cpu_mp_probe(void);
void cpu_mp_setmaxid(void);
void cpu_mp_start(void);
void forward_signal(struct thread *);
int restart_cpus(cpuset_t);
int stop_cpus(cpuset_t);
int stop_cpus_hard(cpuset_t);
#if defined(__amd64__) || defined(__i386__)
int suspend_cpus(cpuset_t);
int resume_cpus(cpuset_t);
#endif
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;
#endif /* SMP */
int quiesce_all_cpus(const char *, int);
int quiesce_cpus(cpuset_t, const char *, int);
void quiesce_all_critical(void);
void cpus_fence_seq_cst(void);
void smp_no_rendezvous_barrier(void *);
void smp_rendezvous(void (*)(void *),
void (*)(void *),
void (*)(void *),
void *arg);
void smp_rendezvous_cpus(cpuset_t,
void (*)(void *),
void (*)(void *),
void (*)(void *),
void *arg);
struct smp_rendezvous_cpus_retry_arg {
cpuset_t cpus;
};
void smp_rendezvous_cpus_retry(cpuset_t,
void (*)(void *),
void (*)(void *),
void (*)(void *),
void (*)(void *, int),
struct smp_rendezvous_cpus_retry_arg *);
void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *);
#endif /* !LOCORE */
#endif /* _KERNEL */
#endif /* _SYS_SMP_H_ */
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index 1027c2c8972b..5d9a57c8febe 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -1,1749 +1,1757 @@
/*-
* Copyright (c) 1996, by Steve Passe
* Copyright (c) 2003, by Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. The name of the developer may NOT be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include "opt_acpi.h"
#ifdef __i386__
#include "opt_apic.h"
#endif
#include "opt_cpu.h"
#include "opt_ddb.h"
#include "opt_gdb.h"
#include "opt_kstack_pages.h"
#include "opt_pmap.h"
#include "opt_sched.h"
#include "opt_smp.h"
#include "opt_stack.h"
+#include "opt_global.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/asan.h>
#include <sys/bus.h>
#include <sys/cons.h> /* cngetc() */
#include <sys/cpuset.h>
#include <sys/csan.h>
#include <sys/interrupt.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <x86/apicreg.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
#include <machine/stack.h>
#include <x86/ucode.h>
#ifdef DEV_ACPI
#include <contrib/dev/acpica/include/acpi.h>
#include <dev/acpica/acpivar.h>
#endif
static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items");
int mp_naps; /* # of Applications processors */
int boot_cpu_id = -1; /* designated BSP */
/* AP uses this during bootstrap. Do not staticize. */
char *bootSTK;
int bootAP;
/* Free these after use */
void *bootstacks[MAXCPU];
void *dpcpu;
struct susppcb **susppcbs;
#ifdef COUNT_IPIS
/* Interrupt counts. */
static u_long *ipi_preempt_counts[MAXCPU];
static u_long *ipi_ast_counts[MAXCPU];
u_long *ipi_invltlb_counts[MAXCPU];
u_long *ipi_invlrng_counts[MAXCPU];
u_long *ipi_invlpg_counts[MAXCPU];
u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
#endif
/* Default cpu_ops implementation. */
struct cpu_ops cpu_ops;
/*
* Local data and functions.
*/
static volatile cpuset_t ipi_stop_nmi_pending;
volatile cpuset_t resuming_cpus;
volatile cpuset_t toresume_cpus;
/* used to hold the AP's until we are ready to release them */
struct mtx ap_boot_mtx;
/* Set to 1 once we're ready to let the APs out of the pen. */
volatile int aps_ready = 0;
/*
* Store data from cpu_add() until later in the boot when we actually setup
* the APs.
*/
struct cpu_info *cpu_info;
int *apic_cpuids;
int cpu_apic_ids[MAXCPU];
_Static_assert(MAXCPU <= MAX_APIC_ID,
"MAXCPU cannot be larger that MAX_APIC_ID");
_Static_assert(xAPIC_MAX_APIC_ID <= MAX_APIC_ID,
"xAPIC_MAX_APIC_ID cannot be larger that MAX_APIC_ID");
static void release_aps(void *dummy);
static void cpustop_handler_post(u_int cpu);
static int hyperthreading_allowed = 1;
SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN,
&hyperthreading_allowed, 0, "Use Intel HTT logical CPUs");
static int hyperthreading_intr_allowed = 0;
SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_intr_allowed, CTLFLAG_RDTUN,
&hyperthreading_intr_allowed, 0,
"Allow interrupts on HTT logical CPUs");
static int intr_apic_id_limit = -1;
SYSCTL_INT(_machdep, OID_AUTO, intr_apic_id_limit, CTLFLAG_RDTUN,
&intr_apic_id_limit, 0,
"Maximum permitted APIC ID for interrupt delivery (-1 is unlimited)");
static struct topo_node topo_root;
static int pkg_id_shift;
static int node_id_shift;
static int core_id_shift;
static int disabled_cpus;
struct cache_info {
int id_shift;
int present;
} static caches[MAX_CACHE_LEVELS];
static bool stop_mwait = false;
SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
"Use MONITOR/MWAIT when stopping CPU, if available");
void
mem_range_AP_init(void)
{
if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
mem_range_softc.mr_op->initAP(&mem_range_softc);
}
/*
* Round up to the next power of two, if necessary, and then
* take log2.
* Returns -1 if argument is zero.
*/
static __inline int
mask_width(u_int x)
{
return (fls(x << (1 - powerof2(x))) - 1);
}
/*
* Add a cache level to the cache topology description.
*/
static int
add_deterministic_cache(int type, int level, int share_count)
{
if (type == 0)
return (0);
if (type > 3) {
printf("unexpected cache type %d\n", type);
return (1);
}
if (type == 2) /* ignore instruction cache */
return (1);
if (level == 0 || level > MAX_CACHE_LEVELS) {
printf("unexpected cache level %d\n", level);
return (1);
}
if (caches[level - 1].present) {
printf("WARNING: multiple entries for L%u data cache\n", level);
printf("%u => %u\n", caches[level - 1].id_shift,
mask_width(share_count));
}
caches[level - 1].id_shift = mask_width(share_count);
caches[level - 1].present = 1;
if (caches[level - 1].id_shift > pkg_id_shift) {
printf("WARNING: L%u data cache covers more "
"APIC IDs than a package (%u > %u)\n", level,
caches[level - 1].id_shift, pkg_id_shift);
caches[level - 1].id_shift = pkg_id_shift;
}
if (caches[level - 1].id_shift < core_id_shift) {
printf("WARNING: L%u data cache covers fewer "
"APIC IDs than a core (%u < %u)\n", level,
caches[level - 1].id_shift, core_id_shift);
caches[level - 1].id_shift = core_id_shift;
}
return (1);
}
/*
* Determine topology of processing units and caches for AMD CPUs.
* See:
* - AMD CPUID Specification (Publication # 25481)
* - BKDG for AMD NPT Family 0Fh Processors (Publication # 32559)
* - BKDG For AMD Family 10h Processors (Publication # 31116)
* - BKDG For AMD Family 15h Models 00h-0Fh Processors (Publication # 42301)
* - BKDG For AMD Family 16h Models 00h-0Fh Processors (Publication # 48751)
* - PPR For AMD Family 17h Models 00h-0Fh Processors (Publication # 54945)
*/
static void
topo_probe_amd(void)
{
u_int p[4];
uint64_t v;
int level;
int nodes_per_socket;
int share_count;
int type;
int i;
/* No multi-core capability. */
if ((amd_feature2 & AMDID2_CMP) == 0)
return;
/*
* XXX Lack of an AMD IOMMU driver prevents use of APIC IDs above
* xAPIC_MAX_APIC_ID. This is a workaround so we boot and function on
* AMD systems with high thread counts, albeit with reduced interrupt
* performance.
*
* We should really set the limit to xAPIC_MAX_APIC_ID by default, and
* have the IOMMU driver increase it. That way if a driver is present
* but disabled, or is otherwise not able to route the interrupts, the
* system can fall back to a functional state. That will require a more
* substantial change though, including having the IOMMU initialize
* earlier.
*/
if (intr_apic_id_limit == -1)
intr_apic_id_limit = xAPIC_MAX_APIC_ID;
/* For families 10h and newer. */
pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
AMDID_COREID_SIZE_SHIFT;
/* For 0Fh family. */
if (pkg_id_shift == 0)
pkg_id_shift =
mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1);
/*
* Families prior to 16h define the following value as
* cores per compute unit and we don't really care about the AMD
* compute units at the moment. Perhaps we should treat them as
* cores and cores within the compute units as hardware threads,
* but that's up for debate.
* Later families define the value as threads per compute unit,
* so we are following AMD's nomenclature here.
*/
if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 &&
CPUID_TO_FAMILY(cpu_id) >= 0x16) {
cpuid_count(0x8000001e, 0, p);
share_count = ((p[1] >> 8) & 0xff) + 1;
core_id_shift = mask_width(share_count);
/*
* For Zen (17h), gather Nodes per Processor. Each node is a
* Zeppelin die; TR and EPYC CPUs will have multiple dies per
* package. Communication latency between dies is higher than
* within them.
*/
nodes_per_socket = ((p[2] >> 8) & 0x7) + 1;
node_id_shift = pkg_id_shift - mask_width(nodes_per_socket);
}
if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) {
for (i = 0; ; i++) {
cpuid_count(0x8000001d, i, p);
type = p[0] & 0x1f;
level = (p[0] >> 5) & 0x7;
share_count = 1 + ((p[0] >> 14) & 0xfff);
if (!add_deterministic_cache(type, level, share_count))
break;
}
} else {
if (cpu_exthigh >= 0x80000005) {
cpuid_count(0x80000005, 0, p);
if (((p[2] >> 24) & 0xff) != 0) {
caches[0].id_shift = 0;
caches[0].present = 1;
}
}
if (cpu_exthigh >= 0x80000006) {
cpuid_count(0x80000006, 0, p);
if (((p[2] >> 16) & 0xffff) != 0) {
caches[1].id_shift = 0;
caches[1].present = 1;
}
if (((p[3] >> 18) & 0x3fff) != 0) {
nodes_per_socket = 1;
if ((amd_feature2 & AMDID2_NODE_ID) != 0) {
/*
* Handle multi-node processors that
* have multiple chips, each with its
* own L3 cache, on the same die.
*/
v = rdmsr(0xc001100c);
nodes_per_socket = 1 + ((v >> 3) & 0x7);
}
caches[2].id_shift =
pkg_id_shift - mask_width(nodes_per_socket);
caches[2].present = 1;
}
}
}
}
/*
* Determine topology of processing units for Intel CPUs
* using CPUID Leaf 1 and Leaf 4, if supported.
* See:
* - Intel 64 Architecture Processor Topology Enumeration
* - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
* Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
* FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
*/
static void
topo_probe_intel_0x4(void)
{
u_int p[4];
int max_cores;
int max_logical;
/* Both zero and one here mean one logical processor per package. */
max_logical = (cpu_feature & CPUID_HTT) != 0 ?
(cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
if (max_logical <= 1)
return;
if (cpu_high >= 0x4) {
cpuid_count(0x04, 0, p);
max_cores = ((p[0] >> 26) & 0x3f) + 1;
} else
max_cores = 1;
core_id_shift = mask_width(max_logical/max_cores);
KASSERT(core_id_shift >= 0,
("intel topo: max_cores > max_logical\n"));
pkg_id_shift = core_id_shift + mask_width(max_cores);
}
/*
* Determine topology of processing units for Intel CPUs
* using CPUID Leaf 1Fh or 0Bh, if supported.
* See:
* - Intel 64 Architecture Processor Topology Enumeration
* - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
* Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS
* FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS
*/
static void
topo_probe_intel_0xb(void)
{
u_int leaf;
u_int p[4] = { 0 };
int bits;
int type;
int i;
/* Prefer leaf 1Fh (V2 Extended Topology Enumeration). */
if (cpu_high >= 0x1f) {
leaf = 0x1f;
cpuid_count(leaf, 0, p);
}
/* Fall back to leaf 0Bh (Extended Topology Enumeration). */
if (p[1] == 0) {
leaf = 0x0b;
cpuid_count(leaf, 0, p);
}
/* Fall back to leaf 04h (Deterministic Cache Parameters). */
if (p[1] == 0) {
topo_probe_intel_0x4();
return;
}
/* We only support three levels for now. */
for (i = 0; ; i++) {
cpuid_count(leaf, i, p);
bits = p[0] & 0x1f;
type = (p[2] >> 8) & 0xff;
if (type == 0)
break;
if (type == CPUID_TYPE_SMT)
core_id_shift = bits;
else if (type == CPUID_TYPE_CORE)
pkg_id_shift = bits;
else if (bootverbose)
printf("Topology level type %d shift: %d\n", type, bits);
}
if (pkg_id_shift < core_id_shift) {
printf("WARNING: core covers more APIC IDs than a package\n");
core_id_shift = pkg_id_shift;
}
}
/*
* Determine topology of caches for Intel CPUs.
* See:
* - Intel 64 Architecture Processor Topology Enumeration
* - Intel 64 and IA-32 Architectures Software Developer’s Manual
* Volume 2A: Instruction Set Reference, A-M,
* CPUID instruction
*/
static void
topo_probe_intel_caches(void)
{
u_int p[4];
int level;
int share_count;
int type;
int i;
if (cpu_high < 0x4) {
/*
* Available cache level and sizes can be determined
* via CPUID leaf 2, but that requires a huge table of hardcoded
* values, so for now just assume L1 and L2 caches potentially
* shared only by HTT processing units, if HTT is present.
*/
caches[0].id_shift = pkg_id_shift;
caches[0].present = 1;
caches[1].id_shift = pkg_id_shift;
caches[1].present = 1;
return;
}
for (i = 0; ; i++) {
cpuid_count(0x4, i, p);
type = p[0] & 0x1f;
level = (p[0] >> 5) & 0x7;
share_count = 1 + ((p[0] >> 14) & 0xfff);
if (!add_deterministic_cache(type, level, share_count))
break;
}
}
/*
* Determine topology of processing units and caches for Intel CPUs.
* See:
* - Intel 64 Architecture Processor Topology Enumeration
*/
static void
topo_probe_intel(void)
{
/*
* Note that 0x1 <= cpu_high < 4 case should be
* compatible with topo_probe_intel_0x4() logic when
* CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
* or it should trigger the fallback otherwise.
*/
if (cpu_high >= 0xb)
topo_probe_intel_0xb();
else if (cpu_high >= 0x1)
topo_probe_intel_0x4();
topo_probe_intel_caches();
}
/*
* Topology information is queried only on BSP, on which this
* code runs and for which it can query CPUID information.
* Then topology is extrapolated on all packages using an
* assumption that APIC ID to hardware component ID mapping is
* homogenious.
* That doesn't necesserily imply that the topology is uniform.
*/
void
topo_probe(void)
{
static int cpu_topo_probed = 0;
struct x86_topo_layer {
int type;
int subtype;
int id_shift;
} topo_layers[MAX_CACHE_LEVELS + 5];
struct topo_node *parent;
struct topo_node *node;
int layer;
int nlayers;
int node_id;
int i;
#if defined(DEV_ACPI) && MAXMEMDOM > 1
int d, domain;
#endif
if (cpu_topo_probed)
return;
CPU_ZERO(&logical_cpus_mask);
if (mp_ncpus <= 1)
; /* nothing */
else if (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON)
topo_probe_amd();
else if (cpu_vendor_id == CPU_VENDOR_INTEL)
topo_probe_intel();
KASSERT(pkg_id_shift >= core_id_shift,
("bug in APIC topology discovery"));
nlayers = 0;
bzero(topo_layers, sizeof(topo_layers));
topo_layers[nlayers].type = TOPO_TYPE_PKG;
topo_layers[nlayers].id_shift = pkg_id_shift;
if (bootverbose)
printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift);
nlayers++;
if (pkg_id_shift > node_id_shift && node_id_shift != 0) {
topo_layers[nlayers].type = TOPO_TYPE_GROUP;
topo_layers[nlayers].id_shift = node_id_shift;
if (bootverbose)
printf("Node ID shift: %u\n",
topo_layers[nlayers].id_shift);
nlayers++;
}
/*
* Consider all caches to be within a package/chip
* and "in front" of all sub-components like
* cores and hardware threads.
*/
for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) {
if (caches[i].present) {
if (node_id_shift != 0)
KASSERT(caches[i].id_shift <= node_id_shift,
("bug in APIC topology discovery"));
KASSERT(caches[i].id_shift <= pkg_id_shift,
("bug in APIC topology discovery"));
KASSERT(caches[i].id_shift >= core_id_shift,
("bug in APIC topology discovery"));
topo_layers[nlayers].type = TOPO_TYPE_CACHE;
topo_layers[nlayers].subtype = i + 1;
topo_layers[nlayers].id_shift = caches[i].id_shift;
if (bootverbose)
printf("L%u cache ID shift: %u\n",
topo_layers[nlayers].subtype,
topo_layers[nlayers].id_shift);
nlayers++;
}
}
if (pkg_id_shift > core_id_shift) {
topo_layers[nlayers].type = TOPO_TYPE_CORE;
topo_layers[nlayers].id_shift = core_id_shift;
if (bootverbose)
printf("Core ID shift: %u\n",
topo_layers[nlayers].id_shift);
nlayers++;
}
topo_layers[nlayers].type = TOPO_TYPE_PU;
topo_layers[nlayers].id_shift = 0;
nlayers++;
#if defined(DEV_ACPI) && MAXMEMDOM > 1
if (vm_ndomains > 1) {
for (layer = 0; layer < nlayers; ++layer) {
for (i = 0; i <= max_apic_id; ++i) {
if ((i & ((1 << topo_layers[layer].id_shift) - 1)) == 0)
domain = -1;
if (!cpu_info[i].cpu_present)
continue;
d = acpi_pxm_get_cpu_locality(i);
if (domain >= 0 && domain != d)
break;
domain = d;
}
if (i > max_apic_id)
break;
}
KASSERT(layer < nlayers, ("NUMA domain smaller than PU"));
memmove(&topo_layers[layer+1], &topo_layers[layer],
sizeof(*topo_layers) * (nlayers - layer));
topo_layers[layer].type = TOPO_TYPE_NODE;
topo_layers[layer].subtype = CG_SHARE_NONE;
nlayers++;
}
#endif
topo_init_root(&topo_root);
for (i = 0; i <= max_apic_id; ++i) {
if (!cpu_info[i].cpu_present)
continue;
parent = &topo_root;
for (layer = 0; layer < nlayers; ++layer) {
#if defined(DEV_ACPI) && MAXMEMDOM > 1
if (topo_layers[layer].type == TOPO_TYPE_NODE) {
node_id = acpi_pxm_get_cpu_locality(i);
} else
#endif
node_id = i >> topo_layers[layer].id_shift;
parent = topo_add_node_by_hwid(parent, node_id,
topo_layers[layer].type,
topo_layers[layer].subtype);
}
}
parent = &topo_root;
for (layer = 0; layer < nlayers; ++layer) {
#if defined(DEV_ACPI) && MAXMEMDOM > 1
if (topo_layers[layer].type == TOPO_TYPE_NODE)
node_id = acpi_pxm_get_cpu_locality(boot_cpu_id);
else
#endif
node_id = boot_cpu_id >> topo_layers[layer].id_shift;
node = topo_find_node_by_hwid(parent, node_id,
topo_layers[layer].type,
topo_layers[layer].subtype);
topo_promote_child(node);
parent = node;
}
cpu_topo_probed = 1;
}
/*
* Assign logical CPU IDs to local APICs.
*/
void
assign_cpu_ids(void)
{
struct topo_node *node;
u_int smt_mask;
int nhyper;
smt_mask = (1u << core_id_shift) - 1;
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
*/
mp_ncpus = 0;
nhyper = 0;
TOPO_FOREACH(node, &topo_root) {
if (node->type != TOPO_TYPE_PU)
continue;
if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask))
cpu_info[node->hwid].cpu_hyperthread = 1;
if (resource_disabled("lapic", node->hwid)) {
if (node->hwid != boot_cpu_id)
cpu_info[node->hwid].cpu_disabled = 1;
else
printf("Cannot disable BSP, APIC ID = %d\n",
node->hwid);
}
if (!hyperthreading_allowed &&
cpu_info[node->hwid].cpu_hyperthread)
cpu_info[node->hwid].cpu_disabled = 1;
if (mp_ncpus >= MAXCPU)
cpu_info[node->hwid].cpu_disabled = 1;
if (cpu_info[node->hwid].cpu_disabled) {
disabled_cpus++;
continue;
}
if (cpu_info[node->hwid].cpu_hyperthread)
nhyper++;
cpu_apic_ids[mp_ncpus] = node->hwid;
apic_cpuids[node->hwid] = mp_ncpus;
topo_set_pu_id(node, mp_ncpus);
mp_ncpus++;
}
KASSERT(mp_maxid >= mp_ncpus - 1,
("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
mp_ncpus));
mp_ncores = mp_ncpus - nhyper;
smp_threads_per_core = mp_ncpus / mp_ncores;
}
/*
* Print various information about the SMP system hardware and setup.
*/
void
cpu_mp_announce(void)
{
struct topo_node *node;
const char *hyperthread;
struct topo_analysis topology;
printf("FreeBSD/SMP: ");
if (topo_analyze(&topo_root, 1, &topology)) {
printf("%d package(s)", topology.entities[TOPO_LEVEL_PKG]);
if (topology.entities[TOPO_LEVEL_GROUP] > 1)
printf(" x %d groups",
topology.entities[TOPO_LEVEL_GROUP]);
if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
printf(" x %d cache groups",
topology.entities[TOPO_LEVEL_CACHEGROUP]);
if (topology.entities[TOPO_LEVEL_CORE] > 0)
printf(" x %d core(s)",
topology.entities[TOPO_LEVEL_CORE]);
if (topology.entities[TOPO_LEVEL_THREAD] > 1)
printf(" x %d hardware threads",
topology.entities[TOPO_LEVEL_THREAD]);
} else {
printf("Non-uniform topology");
}
printf("\n");
if (disabled_cpus) {
printf("FreeBSD/SMP Online: ");
if (topo_analyze(&topo_root, 0, &topology)) {
printf("%d package(s)",
topology.entities[TOPO_LEVEL_PKG]);
if (topology.entities[TOPO_LEVEL_GROUP] > 1)
printf(" x %d groups",
topology.entities[TOPO_LEVEL_GROUP]);
if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1)
printf(" x %d cache groups",
topology.entities[TOPO_LEVEL_CACHEGROUP]);
if (topology.entities[TOPO_LEVEL_CORE] > 0)
printf(" x %d core(s)",
topology.entities[TOPO_LEVEL_CORE]);
if (topology.entities[TOPO_LEVEL_THREAD] > 1)
printf(" x %d hardware threads",
topology.entities[TOPO_LEVEL_THREAD]);
} else {
printf("Non-uniform topology");
}
printf("\n");
}
if (!bootverbose)
return;
TOPO_FOREACH(node, &topo_root) {
switch (node->type) {
case TOPO_TYPE_PKG:
printf("Package HW ID = %u\n", node->hwid);
break;
case TOPO_TYPE_CORE:
printf("\tCore HW ID = %u\n", node->hwid);
break;
case TOPO_TYPE_PU:
if (cpu_info[node->hwid].cpu_hyperthread)
hyperthread = "/HT";
else
hyperthread = "";
if (node->subtype == 0)
printf("\t\tCPU (AP%s): APIC ID: %u"
"(disabled)\n", hyperthread, node->hwid);
else if (node->id == 0)
printf("\t\tCPU0 (BSP): APIC ID: %u\n",
node->hwid);
else
printf("\t\tCPU%u (AP%s): APIC ID: %u\n",
node->id, hyperthread, node->hwid);
break;
default:
/* ignored */
break;
}
}
}
/*
* Add a scheduling group, a group of logical processors sharing
* a particular cache (and, thus having an affinity), to the scheduling
* topology.
* This function recursively works on lower level caches.
*/
static void
x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root)
{
struct topo_node *node;
int nchildren;
int ncores;
int i;
KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE ||
root->type == TOPO_TYPE_NODE || root->type == TOPO_TYPE_GROUP,
("x86topo_add_sched_group: bad type: %u", root->type));
CPU_COPY(&root->cpuset, &cg_root->cg_mask);
cg_root->cg_count = root->cpu_count;
if (root->type == TOPO_TYPE_CACHE)
cg_root->cg_level = root->subtype;
else
cg_root->cg_level = CG_SHARE_NONE;
if (root->type == TOPO_TYPE_NODE)
cg_root->cg_flags = CG_FLAG_NODE;
else
cg_root->cg_flags = 0;
+#if defined(CPUGRP_SCORE)
+ /*
+ * Set default performance/efficiency score.
+ */
+ memset(cg_root->cg_score, CG_SCORE_DEFAULT, sizeof(cg_root->cg_score));
+#endif
+
/*
* Check how many core nodes we have under the given root node.
* If we have multiple logical processors, but not multiple
* cores, then those processors must be hardware threads.
*/
ncores = 0;
node = root;
while (node != NULL) {
if (node->type != TOPO_TYPE_CORE) {
node = topo_next_node(root, node);
continue;
}
ncores++;
node = topo_next_nonchild_node(root, node);
}
if (cg_root->cg_level != CG_SHARE_NONE &&
root->cpu_count > 1 && ncores < 2)
cg_root->cg_flags |= CG_FLAG_SMT;
/*
* Find out how many cache nodes we have under the given root node.
* We ignore cache nodes that cover all the same processors as the
* root node. Also, we do not descend below found cache nodes.
* That is, we count top-level "non-redundant" caches under the root
* node.
*/
nchildren = 0;
node = root;
while (node != NULL) {
/*
* When some APICs are disabled by tunables, nodes can end up
* with an empty cpuset. Nodes with an empty cpuset will be
* translated into cpu groups with empty cpusets. smp_topo_fill
* will then set cg_first and cg_last to -1. This isn't
* correctly handled in all functions. E.g. when
* cpu_search_lowest and cpu_search_highest loop through all
* cpus, they call CPU_ISSET on cpu -1 which ends up in a
* general protection fault.
*
* We could fix the scheduler to handle empty cpu groups
* correctly. Nevertheless, empty cpu groups are causing
* overhead for no value. So, it makes more sense to just don't
* create them.
*/
if (CPU_EMPTY(&node->cpuset)) {
node = topo_next_node(root, node);
continue;
}
if (CPU_CMP(&node->cpuset, &root->cpuset) == 0) {
if (node->type == TOPO_TYPE_CACHE &&
cg_root->cg_level < node->subtype)
cg_root->cg_level = node->subtype;
if (node->type == TOPO_TYPE_NODE)
cg_root->cg_flags |= CG_FLAG_NODE;
node = topo_next_node(root, node);
continue;
}
if (node->type != TOPO_TYPE_GROUP &&
node->type != TOPO_TYPE_NODE &&
node->type != TOPO_TYPE_CACHE) {
node = topo_next_node(root, node);
continue;
}
nchildren++;
node = topo_next_nonchild_node(root, node);
}
/*
* We are not interested in nodes including only one CPU each.
*/
if (nchildren == root->cpu_count)
return;
/*
* We are not interested in nodes without children.
*/
cg_root->cg_children = nchildren;
if (nchildren == 0)
return;
cg_root->cg_child = smp_topo_alloc(nchildren);
/*
* Now find again the same cache nodes as above and recursively
* build scheduling topologies for them.
*/
node = root;
i = 0;
while (node != NULL) {
if ((node->type != TOPO_TYPE_GROUP &&
node->type != TOPO_TYPE_NODE &&
node->type != TOPO_TYPE_CACHE) ||
CPU_CMP(&node->cpuset, &root->cpuset) == 0 ||
CPU_EMPTY(&node->cpuset)) {
node = topo_next_node(root, node);
continue;
}
cg_root->cg_child[i].cg_parent = cg_root;
x86topo_add_sched_group(node, &cg_root->cg_child[i]);
i++;
node = topo_next_nonchild_node(root, node);
}
}
/*
* Build the MI scheduling topology from the discovered hardware topology.
*/
struct cpu_group *
cpu_topo(void)
{
struct cpu_group *cg_root;
if (mp_ncpus <= 1)
return (smp_topo_none());
cg_root = smp_topo_alloc(1);
x86topo_add_sched_group(&topo_root, cg_root);
return (cg_root);
}
static void
cpu_alloc(void *dummy __unused)
{
/*
* Dynamically allocate the arrays that depend on the
* maximum APIC ID.
*/
cpu_info = malloc(sizeof(*cpu_info) * (max_apic_id + 1), M_CPUS,
M_WAITOK | M_ZERO);
apic_cpuids = malloc(sizeof(*apic_cpuids) * (max_apic_id + 1), M_CPUS,
M_WAITOK | M_ZERO);
}
SYSINIT(cpu_alloc, SI_SUB_CPU, SI_ORDER_FIRST, cpu_alloc, NULL);
/*
* Add a logical CPU to the topology.
*/
void
cpu_add(u_int apic_id, char boot_cpu)
{
if (apic_id > max_apic_id)
panic("SMP: APIC ID %d too high", apic_id);
KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %u added twice",
apic_id));
cpu_info[apic_id].cpu_present = 1;
if (boot_cpu) {
KASSERT(boot_cpu_id == -1,
("CPU %u claims to be BSP, but CPU %u already is", apic_id,
boot_cpu_id));
boot_cpu_id = apic_id;
cpu_info[apic_id].cpu_bsp = 1;
}
if (bootverbose)
printf("SMP: Added CPU %u (%s)\n", apic_id, boot_cpu ? "BSP" :
"AP");
}
void
cpu_mp_setmaxid(void)
{
/*
* mp_ncpus and mp_maxid should be already set by calls to cpu_add().
* If there were no calls to cpu_add() assume this is a UP system.
*/
if (mp_ncpus == 0)
mp_ncpus = 1;
}
int
cpu_mp_probe(void)
{
/*
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
*/
CPU_SETOF(0, &all_cpus);
return (mp_ncpus > 1);
}
/*
* AP CPU's call this to initialize themselves.
*/
void
init_secondary_tail(void)
{
u_int cpuid;
pmap_activate_boot(vmspace_pmap(proc0.p_vmspace));
/*
* On real hardware, switch to x2apic mode if possible. Do it
* after aps_ready was signalled, to avoid manipulating the
* mode while BSP might still want to send some IPI to us
* (second startup IPI is ignored on modern hardware etc).
*/
lapic_xapic_mode();
/* Initialize the PAT MSR. */
pmap_init_pat();
/* set up CPU registers and state */
cpu_setregs();
/* set up SSE/NX */
initializecpu();
/* set up FPU state on the AP */
#ifdef __amd64__
fpuinit();
#else
npxinit(false);
#endif
if (cpu_ops.cpu_init)
cpu_ops.cpu_init();
/* A quick check from sanity claus */
cpuid = PCPU_GET(cpuid);
if (PCPU_GET(apic_id) != lapic_id()) {
printf("SMP: cpuid = %d\n", cpuid);
printf("SMP: actual apic_id = %d\n", lapic_id());
printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
panic("cpuid mismatch! boom!!");
}
/* Initialize curthread. */
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
PCPU_SET(curthread, PCPU_GET(idlethread));
schedinit_ap();
mtx_lock_spin(&ap_boot_mtx);
mca_init();
/* Init local apic for irq's */
lapic_setup(1);
/* Set memory range attributes for this CPU to match the BSP */
mem_range_AP_init();
smp_cpus++;
CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
if (bootverbose)
printf("SMP: AP CPU #%d Launched!\n", cpuid);
else
printf("%s%d%s", smp_cpus == 2 ? "Launching APs: " : "",
cpuid, smp_cpus == mp_ncpus ? "\n" : " ");
/* Determine if we are a logical CPU. */
if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread)
CPU_SET(cpuid, &logical_cpus_mask);
if (bootverbose)
lapic_dump("AP");
if (smp_cpus == mp_ncpus) {
/* enable IPI's, tlb shootdown, freezes etc */
atomic_store_rel_int(&smp_started, 1);
}
#ifdef __amd64__
if (pmap_pcid_enabled)
load_cr4(rcr4() | CR4_PCIDE);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_ufssel);
#endif
mtx_unlock_spin(&ap_boot_mtx);
/* Wait until all the AP's are up. */
while (atomic_load_acq_int(&smp_started) == 0)
ia32_pause();
kcsan_cpu_init(cpuid);
sched_ap_entry();
panic("scheduler returned us to %s", __func__);
/* NOTREACHED */
}
static void
smp_after_idle_runnable(void *arg __unused)
{
int cpu;
if (mp_ncpus == 1)
return;
KASSERT(smp_started != 0, ("%s: SMP not started yet", __func__));
/*
* Wait for all APs to handle an interrupt. After that, we know that
* the APs have entered the scheduler at least once, so the boot stacks
* are safe to free.
*/
smp_rendezvous(smp_no_rendezvous_barrier, NULL,
smp_no_rendezvous_barrier, NULL);
for (cpu = 1; cpu < mp_ncpus; cpu++) {
kmem_free(bootstacks[cpu], kstack_pages * PAGE_SIZE);
}
}
SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
smp_after_idle_runnable, NULL);
/*
* We tell the I/O APIC code about all the CPUs we want to receive
* interrupts. If we don't want certain CPUs to receive IRQs we
* can simply not tell the I/O APIC code about them in this function.
* We also do not tell it about the BSP since it tells itself about
* the BSP internally to work with UP kernels and on UP machines.
*/
void
set_interrupt_apic_ids(void)
{
u_int i, apic_id;
for (i = 0; i < MAXCPU; i++) {
apic_id = cpu_apic_ids[i];
if (apic_id == -1)
continue;
if (cpu_info[apic_id].cpu_bsp)
continue;
if (cpu_info[apic_id].cpu_disabled)
continue;
if (intr_apic_id_limit >= 0 && apic_id > intr_apic_id_limit)
continue;
/* Don't let hyperthreads service interrupts. */
if (cpu_info[apic_id].cpu_hyperthread &&
!hyperthreading_intr_allowed)
continue;
intr_add_cpu(i);
}
}
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
#endif /* COUNT_XINVLTLB_HITS */
/*
* Init and startup IPI.
*/
void
ipi_startup(int apic_id, int vector)
{
/*
* This attempts to follow the algorithm described in the
* Intel Multiprocessor Specification v1.4 in section B.4.
* For each IPI, we allow the local APIC ~20us to deliver the
* IPI. If that times out, we panic.
*/
/*
* first we do an INIT IPI: this INIT IPI might be run, resetting
* and running the target CPU. OR this INIT IPI might be latched (P5
* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
* ignored.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
lapic_ipi_wait(100);
/* Explicitly deassert the INIT IPI. */
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
apic_id);
DELAY(10000); /* wait ~10mS */
/*
* next we do a STARTUP IPI: the previous INIT IPI might still be
* latched, (P5 bug) this 1st STARTUP would then terminate
* immediately, and the previously started INIT IPI would continue. OR
* the previous INIT IPI has already run. and this STARTUP IPI will
* run. OR the previous INIT IPI was ignored. and this STARTUP IPI
* will run.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
if (!lapic_ipi_wait(100))
panic("Failed to deliver first STARTUP IPI to APIC %d",
apic_id);
DELAY(200); /* wait ~200uS */
/*
* finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
* the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
* this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
* recognized after hardware RESET or INIT IPI.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
if (!lapic_ipi_wait(100))
panic("Failed to deliver second STARTUP IPI to APIC %d",
apic_id);
DELAY(200); /* wait ~200uS */
}
static bool
ipi_bitmap_set(int cpu, u_int ipi)
{
u_int bitmap, old, new;
u_int *cpu_bitmap;
bitmap = 1 << ipi;
cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap;
old = *cpu_bitmap;
for (;;) {
if ((old & bitmap) != 0)
break;
new = old | bitmap;
if (atomic_fcmpset_int(cpu_bitmap, &old, new))
break;
}
return (old != 0);
}
/*
* Send an IPI to specified CPU handling the bitmap logic.
*/
static void
ipi_send_cpu(int cpu, u_int ipi)
{
KASSERT((u_int)cpu < MAXCPU && cpu_apic_ids[cpu] != -1,
("IPI to non-existent CPU %d", cpu));
if (IPI_IS_BITMAPED(ipi)) {
if (ipi_bitmap_set(cpu, ipi))
return;
ipi = IPI_BITMAP_VECTOR;
}
lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
}
void
ipi_bitmap_handler(struct trapframe frame)
{
struct trapframe *oldframe;
struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
kasan_mark(&frame, sizeof(frame), sizeof(frame), 0);
td = curthread;
ipi_bitmap = atomic_readandclear_int(&cpuid_to_pcpu[cpu]->
pc_ipi_bitmap);
/*
* sched_preempt() must be called to clear the pending preempt
* IPI to enable delivery of further preempts. However, the
* critical section will cause extra scheduler lock thrashing
* when used unconditionally. Only critical_enter() if
* hardclock must also run, which requires the section entry.
*/
if (ipi_bitmap & (1 << IPI_HARDCLOCK))
critical_enter();
td->td_intr_nesting_level++;
oldframe = td->td_intr_frame;
td->td_intr_frame = &frame;
#if defined(STACK) || defined(DDB)
if (ipi_bitmap & (1 << IPI_TRACE))
stack_capture_intr();
#endif
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
(*ipi_ast_counts[cpu])++;
#endif
/* Nothing to do for AST */
}
if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
hardclockintr();
}
td->td_intr_frame = oldframe;
td->td_intr_nesting_level--;
if (ipi_bitmap & (1 << IPI_HARDCLOCK))
critical_exit();
}
/*
* send an IPI to a set of cpus.
*/
void
ipi_selected(cpuset_t cpus, u_int ipi)
{
int cpu;
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus);
CPU_FOREACH_ISSET(cpu, &cpus) {
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
}
/*
* send an IPI to a specific CPU.
*/
void
ipi_cpu(int cpu, u_int ipi)
{
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
/*
* send an IPI to all CPUs EXCEPT myself
*/
void
ipi_all_but_self(u_int ipi)
{
cpuset_t other_cpus;
int cpu, c;
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD) {
other_cpus = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &other_cpus);
CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus);
}
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
if (IPI_IS_BITMAPED(ipi)) {
cpu = PCPU_GET(cpuid);
CPU_FOREACH(c) {
if (c != cpu)
ipi_bitmap_set(c, ipi);
}
ipi = IPI_BITMAP_VECTOR;
}
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
void
ipi_self_from_nmi(u_int vector)
{
lapic_ipi_vectored(vector, APIC_IPI_DEST_SELF);
/* Wait for IPI to finish. */
if (!lapic_ipi_wait(50000)) {
if (KERNEL_PANICKED())
return;
else
panic("APIC: IPI is stuck");
}
}
int
ipi_nmi_handler(void)
{
u_int cpuid;
/*
* As long as there is not a simple way to know about a NMI's
* source, if the bitmask for the current CPU is present in
* the global pending bitword an IPI_STOP_HARD has been issued
* and should be handled.
*/
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending))
return (1);
CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending);
cpustop_handler();
return (0);
}
int nmi_kdb_lock;
void
nmi_call_kdb_smp(u_int type, struct trapframe *frame)
{
int cpu;
bool call_post;
cpu = PCPU_GET(cpuid);
if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
nmi_call_kdb(cpu, type, frame);
call_post = false;
} else {
savectx(&stoppcbs[cpu]);
CPU_SET_ATOMIC(cpu, &stopped_cpus);
while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1))
ia32_pause();
call_post = true;
}
atomic_store_rel_int(&nmi_kdb_lock, 0);
if (call_post)
cpustop_handler_post(cpu);
}
/*
* Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
* if available) until we are resumed.
*/
void
cpustop_handler(void)
{
struct monitorbuf *mb;
u_int cpu;
bool use_mwait;
cpu = PCPU_GET(cpuid);
savectx(&stoppcbs[cpu]);
use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
!mwait_cpustop_broken);
if (use_mwait) {
mb = PCPU_PTR(monitorbuf);
atomic_store_int(&mb->stop_state,
MONITOR_STOPSTATE_STOPPED);
}
/* Indicate that we are stopped */
CPU_SET_ATOMIC(cpu, &stopped_cpus);
/* Wait for restart */
while (!CPU_ISSET(cpu, &started_cpus)) {
if (use_mwait) {
cpu_monitor(mb, 0, 0);
if (atomic_load_int(&mb->stop_state) ==
MONITOR_STOPSTATE_STOPPED)
cpu_mwait(0, MWAIT_C1);
continue;
}
ia32_pause();
/*
* Halt non-BSP CPUs on panic -- we're never going to need them
* again, and might as well save power / release resources
* (e.g., overprovisioned VM infrastructure).
*/
while (__predict_false(!IS_BSP() && KERNEL_PANICKED()))
halt();
}
cpustop_handler_post(cpu);
}
static void
cpustop_handler_post(u_int cpu)
{
CPU_CLR_ATOMIC(cpu, &started_cpus);
CPU_CLR_ATOMIC(cpu, &stopped_cpus);
/*
* We don't broadcast TLB invalidations to other CPUs when they are
* stopped. Hence, we clear the TLB before resuming.
*/
invltlb_glob();
#if defined(__amd64__) && (defined(DDB) || defined(GDB))
amd64_db_resume_dbreg();
#endif
if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
cpustop_restartfunc = NULL;
}
}
/*
* Handle an IPI_SUSPEND by saving our current context and spinning until we
* are resumed.
*/
void
cpususpend_handler(void)
{
u_int cpu;
mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
cpu = PCPU_GET(cpuid);
#ifdef XENHVM
/*
* Some Xen guest types (PVH) expose a very minimal set of ACPI tables,
* and for example have no support for SCI. That leads to the suspend
* stacks not being allocated, and hence when attempting to perform a
* Xen triggered suspension FreeBSD will hit a #PF. Avoid saving the
* CPU and FPU contexts if the stacks are not allocated, as the
* hypervisor will already take care of this. Note that we could even
* do this for Xen triggered suspensions on guests that have full ACPI
* support, but doing so would introduce extra complexity.
*/
if (susppcbs == NULL) {
KASSERT(vm_guest == VM_GUEST_XEN, ("Missing suspend stack"));
CPU_SET_ATOMIC(cpu, &suspended_cpus);
CPU_SET_ATOMIC(cpu, &resuming_cpus);
} else
#endif
if (savectx(&susppcbs[cpu]->sp_pcb)) {
#ifdef __amd64__
fpususpend(susppcbs[cpu]->sp_fpususpend);
#else
npxsuspend(susppcbs[cpu]->sp_fpususpend);
#endif
/*
* suspended_cpus is cleared shortly after each AP is restarted
* by a Startup IPI, so that the BSP can proceed to restarting
* the next AP.
*
* resuming_cpus gets cleared when the AP completes
* initialization after having been released by the BSP.
* resuming_cpus is probably not the best name for the
* variable, because it is actually a set of processors that
* haven't resumed yet and haven't necessarily started resuming.
*
* Note that suspended_cpus is meaningful only for ACPI suspend
* as it's not really used for Xen suspend since the APs are
* automatically restored to the running state and the correct
* context. For the same reason resumectx is never called in
* that case.
*/
CPU_SET_ATOMIC(cpu, &suspended_cpus);
CPU_SET_ATOMIC(cpu, &resuming_cpus);
/*
* Invalidate the cache after setting the global status bits.
* The last AP to set its bit may end up being an Owner of the
* corresponding cache line in MOESI protocol. The AP may be
* stopped before the cache line is written to the main memory.
*/
wbinvd();
} else {
#ifdef __amd64__
fpuresume(susppcbs[cpu]->sp_fpususpend);
#else
npxresume(susppcbs[cpu]->sp_fpususpend);
#endif
pmap_init_pat();
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
/* Indicate that we have restarted and restored the context. */
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/* Wait for resume directive */
while (!CPU_ISSET(cpu, &toresume_cpus))
ia32_pause();
/* Re-apply microcode updates. */
ucode_reload();
#ifdef __i386__
/* Finish removing the identity mapping of low memory for this AP. */
invltlb_glob();
#endif
if (cpu_ops.cpu_resume)
cpu_ops.cpu_resume();
#ifdef __amd64__
if (vmm_resume_p)
vmm_resume_p();
#endif
/* Resume MCA and local APIC */
lapic_xapic_mode();
mca_resume();
lapic_setup(0);
/* Indicate that we are resumed */
CPU_CLR_ATOMIC(cpu, &resuming_cpus);
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
CPU_CLR_ATOMIC(cpu, &toresume_cpus);
}
/*
* Handle an IPI_SWI by waking delayed SWI thread.
*/
void
ipi_swi_handler(struct trapframe frame)
{
intr_event_handle(clk_intr_event, &frame);
}
/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
static void
release_aps(void *dummy __unused)
{
if (mp_ncpus == 1)
return;
atomic_store_rel_int(&aps_ready, 1);
while (smp_started == 0)
ia32_pause();
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.
*/
static void
mp_ipi_intrcnt(void *dummy)
{
char buf[64];
int i;
CPU_FOREACH(i) {
snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
intrcnt_add(buf, &ipi_invltlb_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
intrcnt_add(buf, &ipi_invlrng_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
intrcnt_add(buf, &ipi_invlpg_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
intrcnt_add(buf, &ipi_invlcache_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
intrcnt_add(buf, &ipi_preempt_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:ast", i);
intrcnt_add(buf, &ipi_ast_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
intrcnt_add(buf, &ipi_rendezvous_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
#endif

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8506201
Default Alt Text
part3_add_score_variable_to_cpu_group_struct_fbsd15c.patch (117 KB)

Event Timeline