Index: head/sys/conf/NOTES =================================================================== --- head/sys/conf/NOTES (revision 282640) +++ head/sys/conf/NOTES (revision 282641) @@ -1,2984 +1,2985 @@ # $FreeBSD$ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers', # 'makeoptions', 'hints', etc. go into the kernel configuration that you # run config(8) with. # # Lines that begin with 'hint.' are NOT for config(8), they go into your # hints file. See /boot/device.hints and/or the 'hints' config(8) directive. # # Please use ``make LINT'' to create an old-style LINT file if you want to # do kernel test-builds. # # This file contains machine independent kernel configuration notes. For # machine dependent notes, look in /sys//conf/NOTES. # # # NOTES conventions and style guide: # # Large block comments should begin and end with a line containing only a # comment character. # # To describe a particular object, a block comment (if it exists) should # come first. Next should come device, options, and hints lines in that # order. All device and option lines must be described by a comment that # doesn't just expand the device or option name. Use only a concise # comment on the same line if possible. Very detailed descriptions of # devices and subsystems belong in man pages. # # A space followed by a tab separates 'options' from an option name. Two # spaces followed by a tab separate 'device' from a device name. Comments # after an option or device should use one space after the comment character. # To comment out a negative option that disables code and thus should not be # enabled for LINT builds, precede 'options' with "#!". # # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a formula defined in subr_param.c. # Omitting this parameter or setting it to 0 will cause the system to # auto-size based on physical memory. # maxusers 10 # To statically compile in device wiring instead of /boot/device.hints #hints "LINT.hints" # Default places to look for devices. # Use the following to compile in values accessible to the kernel # through getenv() (or kenv(1) in userland). The format of the file # is 'variable=value', see kenv(1) # #env "LINT.env" # # The `makeoptions' parameter allows variables to be passed to the # generated Makefile in the build area. # # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS} # after most other flags. Here we use it to inhibit use of non-optimal # gcc built-in functions (e.g., memcmp). # # DEBUG happens to be magic. # The following is equivalent to 'config -g KERNELNAME' and creates # 'kernel.debug' compiled with -g debugging as well as a normal # 'kernel'. Use 'make install.debug' to install the debug kernel # but that isn't normally necessary as the debug symbols are not loaded # by the kernel and are not useful there anyway. # # KERNEL can be overridden so that you can change the default name of your # kernel. # # MODULES_OVERRIDE can be used to limit modules built to a specific list. # makeoptions CONF_CFLAGS=-fno-builtin #Don't allow use of memcmp, etc. #makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols #makeoptions KERNEL=foo #Build kernel "foo" and install "/foo" # Only build ext2fs module plus those parts of the sound system I need. #makeoptions MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3" makeoptions DESTDIR=/tmp # # FreeBSD processes are subject to certain limits to their consumption # of system resources. See getrlimit(2) for more details. Each # resource limit has two values, a "soft" limit and a "hard" limit. # The soft limits can be modified during normal system operation, but # the hard limits are set at boot time. Their default values are # in sys//include/vmparam.h. There are two ways to change them: # # 1. Set the values at kernel build time. The options below are one # way to allow that limit to grow to 1GB. They can be increased # further by changing the parameters: # # 2. In /boot/loader.conf, set the tunables kern.maxswzone, # kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz, # kern.dflssiz, kern.maxssiz and kern.sgrowsiz. # # The options in /boot/loader.conf override anything in the kernel # configuration file. See the function init_param1 in # sys/kern/subr_param.c for more details. # options MAXDSIZ=(1024UL*1024*1024) options MAXSSIZ=(128UL*1024*1024) options DFLDSIZ=(1024UL*1024*1024) # # BLKDEV_IOSIZE sets the default block size used in user block # device I/O. Note that this value will be overridden by the label # when specifying a block device from a label with a non-0 # partition blocksize. The default is PAGE_SIZE. # options BLKDEV_IOSIZE=8192 # # MAXPHYS and DFLTPHYS # # These are the maximal and safe 'raw' I/O block device access sizes. # Reads and writes will be split into MAXPHYS chunks for known good # devices and DFLTPHYS for the rest. Some applications have better # performance with larger raw I/O access sizes. Note that certain VM # parameters are derived from these values and making them too large # can make an unbootable kernel. # # The defaults are 64K and 128K respectively. options DFLTPHYS=(64*1024) options MAXPHYS=(128*1024) # This allows you to actually store this configuration file into # the kernel binary itself. See config(8) for more details. # options INCLUDE_CONFIG_FILE # Include this file in kernel # # Compile-time defaults for various boot parameters # options BOOTVERBOSE=1 options BOOTHOWTO=RB_MULTIPLE options GEOM_AES # Don't use, use GEOM_BDE options GEOM_BDE # Disk encryption. options GEOM_BSD # BSD disklabels options GEOM_CACHE # Disk cache. options GEOM_CONCAT # Disk concatenation. options GEOM_ELI # Disk encryption. options GEOM_FOX # Redundant path mitigation options GEOM_GATE # Userland services. options GEOM_JOURNAL # Journaling. options GEOM_LABEL # Providers labelization. options GEOM_LINUX_LVM # Linux LVM2 volumes options GEOM_MBR # DOS/MBR partitioning options GEOM_MIRROR # Disk mirroring. options GEOM_MULTIPATH # Disk multipath options GEOM_NOP # Test class. options GEOM_PART_APM # Apple partitioning options GEOM_PART_BSD # BSD disklabel options GEOM_PART_BSD64 # BSD disklabel64 options GEOM_PART_EBR # Extended Boot Records options GEOM_PART_EBR_COMPAT # Backward compatible partition names options GEOM_PART_GPT # GPT partitioning options GEOM_PART_LDM # Logical Disk Manager options GEOM_PART_MBR # MBR partitioning options GEOM_PART_PC98 # PC-9800 disk partitioning options GEOM_PART_VTOC8 # SMI VTOC8 disk label options GEOM_PC98 # NEC PC9800 partitioning options GEOM_RAID # Soft RAID functionality. options GEOM_RAID3 # RAID3 functionality. options GEOM_SHSEC # Shared secret. options GEOM_STRIPE # Disk striping. options GEOM_SUNLABEL # Sun/Solaris partitioning options GEOM_UZIP # Read-only compressed disks options GEOM_VINUM # Vinum logical volume manager options GEOM_VIRSTOR # Virtual storage. options GEOM_VOL # Volume names from UFS superblock options GEOM_ZERO # Performance testing helper. # # The root device and filesystem type can be compiled in; # this provides a fallback option if the root device cannot # be correctly guessed by the bootstrap code, or an override if # the RB_DFLTROOT flag (-r) is specified when booting the kernel. # options ROOTDEVNAME=\"ufs:da0s2e\" ##################################################################### # Scheduler options: # # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory. These options # select which scheduler is compiled in. # # SCHED_4BSD is the historical, proven, BSD scheduler. It has a global run # queue and no CPU affinity which makes it suboptimal for SMP. It has very # good interactivity and priority selection. # # SCHED_ULE provides significant performance advantages over 4BSD on many # workloads on SMP machines. It supports cpu-affinity, per-cpu runqueues # and scheduler locks. It also has a stronger notion of interactivity # which leads to better responsiveness even on uniprocessor machines. This # is the default scheduler. # # SCHED_STATS is a debugging option which keeps some stats in the sysctl # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions. # options SCHED_4BSD options SCHED_STATS #options SCHED_ULE ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # Mandatory: options SMP # Symmetric MultiProcessor Kernel # MAXCPU defines the maximum number of CPUs that can boot in the system. # A default value should be already present, for every architecture. options MAXCPU=32 # MAXMEMDOM defines the maximum number of memory domains that can boot in the # system. A default value should already be defined by every architecture. options MAXMEMDOM=1 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin # if the thread that currently owns the mutex is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_MUTEXES # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin # if the thread that currently owns the rwlock is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_RWLOCKS # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that # currently owns the sx lock is executing on another CPU. # This behavior is enabled by default, so this option can be used to # disable it. options NO_ADAPTIVE_SX # MUTEX_NOINLINE forces mutex operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options MUTEX_NOINLINE # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options RWLOCK_NOINLINE # SX_NOINLINE forces sx lock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options SX_NOINLINE # SMP Debugging Options: # # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data # structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. # WARNING! Only tested on amd64 and i386. # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel # threads. Its sole use is to expose race conditions and other # bugs during development. Enabling this option will reduce # performance and increase the frequency of kernel panics by # design. If you aren't sure that you need it then you don't. # Relies on the PREEMPTION option. DON'T TURN THIS ON. # MUTEX_DEBUG enables various extra assertions in the mutex code. # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table # used to hold active sleep queues as well as sleep wait message # frequency. # TURNSTILE_PROFILING enables rudimentary profiling of the hash table # used to hold active lock queues. # UMTX_PROFILING enables rudimentary profiling of the hash table used to hold active lock queues. # WITNESS enables the witness code which detects deadlocks and cycles # during locking operations. # WITNESS_KDB causes the witness code to drop into the kernel debugger if # a lock hierarchy violation occurs or if locks are held when going to # sleep. # WITNESS_SKIPSPIN disables the witness checks on spin mutexes. options PREEMPTION options FULL_PREEMPTION options MUTEX_DEBUG options WITNESS options WITNESS_KDB options WITNESS_SKIPSPIN # LOCK_PROFILING - Profiling locks. See LOCK_PROFILING(9) for details. options LOCK_PROFILING # Set the number of buffers and the hash size. The hash size MUST be larger # than the number of buffers. Hash size should be prime. options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" # Profiling for the callout(9) backend. options CALLOUT_PROFILING # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING options UMTX_PROFILING ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. Note that some architectures that # are supported by FreeBSD do not include support for certain important # aspects of this compatibility option, namely those related to the # signal delivery mechanism. # options COMPAT_43 # Old tty interface. options COMPAT_43TTY # Note that as a general rule, COMPAT_FREEBSD depends on # COMPAT_FREEBSD, COMPAT_FREEBSD, etc. # Enable FreeBSD4 compatibility syscalls options COMPAT_FREEBSD4 # Enable FreeBSD5 compatibility syscalls options COMPAT_FREEBSD5 # Enable FreeBSD6 compatibility syscalls options COMPAT_FREEBSD6 # Enable FreeBSD7 compatibility syscalls options COMPAT_FREEBSD7 # Enable FreeBSD9 compatibility syscalls options COMPAT_FREEBSD9 # Enable FreeBSD10 compatibility syscalls options COMPAT_FREEBSD10 # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Compile with kernel debugger related code. # options KDB # # Print a stack trace of the current thread on the console for a panic. # options KDB_TRACE # # Don't enter the debugger for a panic. Intended for unattended operation # where you may want to enter the debugger from the console, but still want # the machine to recover from a panic. # options KDB_UNATTENDED # # Enable the ddb debugger backend. # options DDB # # Print the numerical value of symbols in addition to the symbolic # representation. # options DDB_NUMSYM # # Enable the remote gdb debugger backend. # options GDB # # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the # contents of the registered sysctl nodes on the console. It is disabled by # default because it generates excessively verbose console output that can # interfere with serial console operation. # options SYSCTL_DEBUG # # Enable textdump by default, this disables kernel core dumps. # options TEXTDUMP_PREFERRED # # Enable extra debug messages while performing textdumps. # options TEXTDUMP_VERBOSE # # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the # resulting kernel. options NO_SYSCTL_DESCR # # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9) # allocations that are smaller than a page. The purpose is to isolate # different malloc types into hash classes, so that any buffer # overruns or use-after-free will usually only affect memory from # malloc types in that hash class. This is purely a debugging tool; # by varying the hash function and tracking which hash class was # corrupted, the intersection of the hash classes from each instance # will point to a single malloc type that is being misused. At this # point inspection or memguard(9) can be used to catch the offending # code. # options MALLOC_DEBUG_MAXZONES=8 # # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator # for the kernel used to detect modify-after-free scenarios. See the # memguard(9) man page for more information on usage. # options DEBUG_MEMGUARD # # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for # malloc(9). # options DEBUG_REDZONE # # EARLY_PRINTF enables support for calling a special printf (eprintf) # very early in the kernel (before cn_init() has been called). This # should only be used for debugging purposes early in boot. Normally, # it is not defined. It is commented out here because this feature # isn't generally available. And the required eputc() isn't defined. # #options EARLY_PRINTF # # KTRACE enables the system-call tracing facility ktrace(2). To be more # SMP-friendly, KTRACE uses a worker thread to process most trace events # asynchronously to the thread generating the event. This requires a # pre-allocated store of objects representing trace events. The # KTRACE_REQUEST_POOL option specifies the initial size of this store. # The size of the pool can be adjusted both at boottime and runtime via # the kern.ktrace_request_pool tunable and sysctl. # options KTRACE #kernel tracing options KTRACE_REQUEST_POOL=101 # # KTR is a kernel tracing facility imported from BSD/OS. It is # enabled with the KTR option. KTR_ENTRIES defines the number of # entries in the circular trace buffer; it may be an arbitrary number. # KTR_BOOT_ENTRIES defines the number of entries during the early boot, # before malloc(9) is functional. # KTR_COMPILE defines the mask of events to compile into the kernel as # defined by the KTR_* constants in . KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log # events, with bit X corresponding to CPU X. The layout of the string # passed as KTR_CPUMASK must match a series of bitmasks each of them # separated by the "," character (ie: # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. # options KTR options KTR_BOOT_ENTRIES=1024 options KTR_ENTRIES=(128*1024) options KTR_COMPILE=(KTR_ALL) options KTR_MASK=KTR_INTR options KTR_CPUMASK=0x3 options KTR_VERBOSE # # ALQ(9) is a facility for the asynchronous queuing of records from the kernel # to a vnode, and is employed by services such as ktr(4) to produce trace # files based on a kernel event stream. Records are written asynchronously # in a worker thread. # options ALQ options KTR_ALQ # # The INVARIANTS option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options INVARIANTS # # The INVARIANT_SUPPORT option makes us compile in support for # verifying some of the internal structures. It is a prerequisite for # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be # called. The intent is that you can set 'INVARIANTS' for single # source files (by changing the source file or specifying it on the # command line) if you have 'INVARIANT_SUPPORT' enabled. Also, if you # wish to build a kernel module with 'INVARIANTS', then adding # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary # infrastructure without the added overhead. # options INVARIANT_SUPPORT # # The DIAGNOSTIC option is used to enable extra debugging information # from some parts of the kernel. As this makes everything more noisy, # it is disabled by default. # options DIAGNOSTIC # # REGRESSION causes optional kernel interfaces necessary only for regression # testing to be enabled. These interfaces may constitute security risks # when enabled, as they permit processes to easily modify aspects of the # run-time environment to reproduce unlikely or unusual (possibly normally # impossible) scenarios. # options REGRESSION # # This option lets some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # # STACK enables the stack(9) facility, allowing the capture of kernel stack # for the purpose of procinfo(1), etc. stack(9) will also be compiled in # automatically if DDB(4) is compiled into the kernel. # options STACK ##################################################################### # PERFORMANCE MONITORING OPTIONS # # The hwpmc driver that allows the use of in-CPU performance monitoring # counters for performance monitoring. The base kernel needs to be configured # with the 'options' line, while the hwpmc device can be either compiled # in or loaded as a loadable kernel module. # # Additional configuration options may be required on specific architectures, # please see hwpmc(4). device hwpmc # Driver (also a loadable module) +options HWPMC_DEBUG options HWPMC_HOOKS # Other necessary kernel hooks ##################################################################### # NETWORKING OPTIONS # # Protocol families # options INET #Internet communications protocols options INET6 #IPv6 communications protocols options ROUTETABLES=2 # allocated fibs up to 65536. default is 1. # but that would be a bad idea as they are large. options TCP_OFFLOAD # TCP offload support. # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) #options IPSEC_DEBUG #debug for IP security # # #DEPRECATED# # Set IPSEC_FILTERTUNNEL to change the default of the sysctl to force packets # coming through a tunnel to be processed by any configured packet filtering # twice. The default is that packets coming out of a tunnel are _not_ processed; # they are assumed trusted. # # IPSEC history is preserved for such packets, and can be filtered # using ipfw(8)'s 'ipsec' keyword, when this option is enabled. # #options IPSEC_FILTERTUNNEL #filter ipsec packets from a tunnel # # Set IPSEC_NAT_T to enable NAT-Traversal support. This enables # optional UDP encapsulation of ESP packets. # options IPSEC_NAT_T #NAT-T support, UDP encap of ESP # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV # options. options NETSMB #SMB/CIFS requester # mchain library. It can be either loaded as KLD or compiled into kernel options LIBMCHAIN # libalias library, performing NAT options LIBALIAS # flowtable cache options FLOWTABLE # # SCTP is a NEW transport protocol defined by # RFC2960 updated by RFC3309 and RFC3758.. and # soon to have a new base RFC and many many more # extensions. This release supports all the extensions # including many drafts (most about to become RFC's). # It is the reference implementation of SCTP # and is quite well tested. # # Note YOU MUST have both INET and INET6 defined. # You don't have to enable V6, but SCTP is # dual stacked and so far we have not torn apart # the V6 and V4.. since an association can span # both a V6 and V4 address at the SAME time :-) # options SCTP # There are bunches of options: # this one turns on all sorts of # nastily printing that you can # do. It's all controlled by a # bit mask (settable by socket opt and # by sysctl). Including will not cause # logging until you set the bits.. but it # can be quite verbose.. so without this # option we don't do any of the tests for # bits and prints.. which makes the code run # faster.. if you are not debugging don't use. options SCTP_DEBUG # # This option turns off the CRC32c checksum. Basically, # you will not be able to talk to anyone else who # has not done this. Its more for experimentation to # see how much CPU the CRC32c really takes. Most new # cards for TCP support checksum offload.. so this # option gives you a "view" into what SCTP would be # like with such an offload (which only exists in # high in iSCSI boards so far). With the new # splitting 8's algorithm its not as bad as it used # to be.. but it does speed things up try only # for in a captured lab environment :-) options SCTP_WITH_NO_CSUM # # # All that options after that turn on specific types of # logging. You can monitor CWND growth, flight size # and all sorts of things. Go look at the code and # see. I have used this to produce interesting # charts and graphs as well :-> # # I have not yet committed the tools to get and print # the logs, I will do that eventually .. before then # if you want them send me an email rrs@freebsd.org # You basically must have ktr(4) enabled for these # and you then set the sysctl to turn on/off various # logging bits. Use ktrdump(8) to pull the log and run # it through a display program.. and graphs and other # things too. # options SCTP_LOCK_LOGGING options SCTP_MBUF_LOGGING options SCTP_MBCNT_LOGGING options SCTP_PACKET_LOGGING options SCTP_LTRACE_CHUNKS options SCTP_LTRACE_ERRORS # altq(9). Enable the base part of the hooks with the ALTQ option. # Individual disciplines must be built into the base system and can not be # loaded as modules at this point. ALTQ requires a stable TSC so if yours is # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC # option. options ALTQ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out options ALTQ_HFSC # Hierarchical Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable options ALTQ_DEBUG # netgraph(4). Enable the base netgraph code with the NETGRAPH option. # Individual node types can be enabled with the corresponding option # listed below; however, this is not strictly necessary as netgraph # will automatically load the corresponding KLD module if the node type # is not already compiled into the kernel. Each type below has a # corresponding man page, e.g., ng_async(8). options NETGRAPH # netgraph(4) system options NETGRAPH_DEBUG # enable extra debugging, this # affects netgraph(4) and nodes # Node types options NETGRAPH_ASYNC options NETGRAPH_ATMLLC options NETGRAPH_ATM_ATMPIF options NETGRAPH_BLUETOOTH # ng_bluetooth(4) options NETGRAPH_BLUETOOTH_BT3C # ng_bt3c(4) options NETGRAPH_BLUETOOTH_HCI # ng_hci(4) options NETGRAPH_BLUETOOTH_L2CAP # ng_l2cap(4) options NETGRAPH_BLUETOOTH_SOCKET # ng_btsocket(4) options NETGRAPH_BLUETOOTH_UBT # ng_ubt(4) options NETGRAPH_BLUETOOTH_UBTBCMFW # ubtbcmfw(4) options NETGRAPH_BPF options NETGRAPH_BRIDGE options NETGRAPH_CAR options NETGRAPH_CISCO options NETGRAPH_DEFLATE options NETGRAPH_DEVICE options NETGRAPH_ECHO options NETGRAPH_EIFACE options NETGRAPH_ETHER options NETGRAPH_FRAME_RELAY options NETGRAPH_GIF options NETGRAPH_GIF_DEMUX options NETGRAPH_HOLE options NETGRAPH_IFACE options NETGRAPH_IP_INPUT options NETGRAPH_IPFW options NETGRAPH_KSOCKET options NETGRAPH_L2TP options NETGRAPH_LMI # MPPC compression requires proprietary files (not included) #options NETGRAPH_MPPC_COMPRESSION options NETGRAPH_MPPC_ENCRYPTION options NETGRAPH_NETFLOW options NETGRAPH_NAT options NETGRAPH_ONE2MANY options NETGRAPH_PATCH options NETGRAPH_PIPE options NETGRAPH_PPP options NETGRAPH_PPPOE options NETGRAPH_PPTPGRE options NETGRAPH_PRED1 options NETGRAPH_RFC1490 options NETGRAPH_SOCKET options NETGRAPH_SPLIT options NETGRAPH_SPPP options NETGRAPH_TAG options NETGRAPH_TCPMSS options NETGRAPH_TEE options NETGRAPH_UI options NETGRAPH_VJC options NETGRAPH_VLAN # NgATM - Netgraph ATM options NGATM_ATM options NGATM_ATMBASE options NGATM_SSCOP options NGATM_SSCFU options NGATM_UNI options NGATM_CCATM device mn # Munich32x/Falc54 Nx64kbit/sec cards. # Network stack virtualization. #options VIMAGE #options VNET_DEBUG # debug for VIMAGE # # Network interfaces: # The `loop' device is MANDATORY when networking is enabled. device loop # The `ether' device provides generic code to handle # Ethernets; it is MANDATORY when an Ethernet device driver is # configured or token-ring is enabled. device ether # The `vlan' device implements the VLAN tagging of Ethernet frames # according to IEEE 802.1Q. device vlan # The `vxlan' device implements the VXLAN encapsulation of Ethernet # frames in UDP packets according to RFC7348. device vxlan # The `wlan' device provides generic code to support 802.11 # drivers, including host AP mode; it is MANDATORY for the wi, # and ath drivers and will eventually be required by all 802.11 drivers. device wlan options IEEE80211_DEBUG #enable debugging msgs options IEEE80211_AMPDU_AGE #age frames in AMPDU reorder q's options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support options IEEE80211_SUPPORT_TDMA #enable TDMA support # The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide # support for WEP, TKIP, and AES-CCMP crypto protocols optionally # used with 802.11 devices that depend on the `wlan' module. device wlan_wep device wlan_ccmp device wlan_tkip # The `wlan_xauth' device provides support for external (i.e. user-mode) # authenticators for use with 802.11 drivers that use the `wlan' # module and support 802.1x and/or WPA security protocols. device wlan_xauth # The `wlan_acl' device provides a MAC-based access control mechanism # for use with 802.11 drivers operating in ap mode and using the # `wlan' module. # The 'wlan_amrr' device provides AMRR transmit rate control algorithm device wlan_acl device wlan_amrr # Generic TokenRing device token # The `fddi' device provides generic code to support FDDI. device fddi # The `arcnet' device provides generic code to support Arcnet. device arcnet # The `sppp' device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). device sppp # The `bpf' device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. DHCP requires bpf. device bpf # The `netmap' device implements memory-mapped access to network # devices from userspace, enabling wire-speed packet capture and # generation even at 10Gbit/s. Requires support in the device # driver. Supported drivers are ixgbe, e1000, re. device netmap # The `disc' device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing and benchmarking purposes. device disc # The `epair' device implements a virtual back-to-back connected Ethernet # like interface pair. device epair # The `edsc' device implements a minimal Ethernet interface, # which discards all packets sent and receives none. device edsc # The `tap' device is a pty-like virtual Ethernet interface device tap # The `tun' device implements (user-)ppp and nos-tun(8) device tun # The `gif' device implements IPv6 over IP4 tunneling, # IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and # IPv6 over IPv6 tunneling. # The `gre' device implements GRE (Generic Routing Encapsulation) tunneling, # as specified in the RFC 2784 and RFC 2890. # The `me' device implements Minimal Encapsulation within IPv4 as # specified in the RFC 2004. # The XBONEHACK option allows the same pair of addresses to be configured on # multiple gif interfaces. device gif device gre device me options XBONEHACK # The `stf' device implements 6to4 encapsulation. device stf # The pf packet filter consists of three devices: # The `pf' device provides /dev/pf and the firewall code itself. # The `pflog' device provides the pflog0 interface which logs packets. # The `pfsync' device provides the pfsync0 interface used for # synchronization of firewall state tables (over the net). device pf device pflog device pfsync # Bridge interface. device if_bridge # Common Address Redundancy Protocol. See carp(4) for more details. device carp # IPsec interface. device enc # Link aggregation interface. device lagg # # Internet family options: # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted and XORP. # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall_type=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''. It # depends on IPFIREWALL if compiled into the kernel. # # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires # LIBALIAS. # # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding # packets without touching the TTL). This can be useful to hide firewalls # from traceroute and similar tools. # # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything. # # TCPDEBUG enables code which keeps traces of the TCP state machine # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # # RADIX_MPATH provides support for equal-cost multi-path routing. # options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #enable logging to syslogd(8) options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPFIREWALL_NAT #ipfw kernel nat support options IPDIVERT #divert sockets options IPFILTER #ipfilter support options IPFILTER_LOG #ipfilter logging options IPFILTER_LOOKUP #ipfilter pools options IPFILTER_DEFAULT_BLOCK #block all packets by default options IPSTEALTH #support for stealth forwarding options PF_DEFAULT_TO_DROP #drop everything by default options TCPDEBUG options RADIX_MPATH # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See mbuf(9) for a list of available test cases. # MBUF_PROFILING enables code to profile the mbuf chains # exiting the system (via participating interfaces) and # return a logarithmic histogram of monitored parameters # (e.g. packet size, wasted space, number of mbufs in chain). options MBUF_STRESS_TEST options MBUF_PROFILING # Statically link in accept filters options ACCEPT_FILTER_DATA options ACCEPT_FILTER_DNS options ACCEPT_FILTER_HTTP # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are # carried in TCP option 19. This option is commonly used to protect # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable. # This is enabled on a per-socket basis using the TCP_MD5SIG socket option. # This requires the use of 'device crypto', 'options IPSEC' # or 'device cryptodev'. options TCP_SIGNATURE #include support for RFC 2385 # DUMMYNET enables the "dummynet" bandwidth limiter. You need IPFIREWALL # as well. See dummynet(4) and ipfw(8) for more info. When you run # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve # a smooth scheduling of the traffic. options DUMMYNET ##################################################################### # FILESYSTEM OPTIONS # # Only the root filesystem needs to be statically compiled or preloaded # as module; everything else will be automatically loaded at mount # time. Some people still prefer to statically compile other # filesystems as well. # # NB: The UNION filesystem was known to be buggy in the past. It is now # being actively maintained, although there are still some issues being # resolved. # # One of these is mandatory: options FFS #Fast filesystem options NFSCL #Network File System client # The rest are optional: options AUTOFS #Automounter filesystem options CD9660 #ISO 9660 filesystem options FDESCFS #File descriptor filesystem options FUSE #FUSE support module options MSDOSFS #MS DOS File System (FAT, FAT32) options NFSLOCKD #Network Lock Manager options NFSD #Network Filesystem Server options KGSSAPI #Kernel GSSAPI implementation options NULLFS #NULL filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options PSEUDOFS_TRACE #Debugging support for PSEUDOFS options SMBFS #SMB/CIFS filesystem options TMPFS #Efficient memory filesystem options UDF #Universal Disk Format options UNIONFS #Union filesystem # The xFS_ROOT options REQUIRE the associated ``options xFS'' options NFS_ROOT #NFS usable as root device # Soft updates is a technique for improving filesystem speed and # making abrupt shutdown less risky. # options SOFTUPDATES # Extended attributes allow additional data to be associated with files, # and is used for ACLs, Capabilities, and MAC labels. # See src/sys/ufs/ufs/README.extattr for more information. options UFS_EXTATTR options UFS_EXTATTR_AUTOSTART # Access Control List support for UFS filesystems. The current ACL # implementation requires extended attribute support, UFS_EXTATTR, # for the underlying filesystem. # See src/sys/ufs/ufs/README.acls for more information. options UFS_ACL # Directory hashing improves the speed of operations on very large # directories at the expense of some memory. options UFS_DIRHASH # Gjournal-based UFS journaling support. options UFS_GJOURNAL # Make space in the kernel for a root filesystem on a md device. # Define to the number of kilobytes to reserve for the filesystem. options MD_ROOT_SIZE=10 # Make the md device a potential root device, either with preloaded # images of type mfs_root or md_root. options MD_ROOT # Disk quotas are supported when this option is enabled. options QUOTA #enable disk quotas # If you are running a machine just as a fileserver for PC and MAC # users, using SAMBA, you may consider setting this option # and keeping all those users' directories on a filesystem that is # mounted with the suiddir option. This gives new files the same # ownership as the directory (similar to group). It's a security hole # if you let these users run programs, so confine it to file-servers # (but it'll save you lots of headaches in those cases). Root owned # directories are exempt and X bits are cleared. The suid bit must be # set on the directory as well; see chmod(1). PC owners can't see/set # ownerships so they keep getting their toes trodden on. This saves # you all the support calls as the filesystem it's used on will act as # they expect: "It's my dir so it must be my file". # options SUIDDIR # NFS options: options NFS_MINATTRTIMO=3 # VREG attrib cache timeout in sec options NFS_MAXATTRTIMO=60 options NFS_MINDIRATTRTIMO=30 # VDIR attrib cache timeout in sec options NFS_MAXDIRATTRTIMO=60 options NFS_DEBUG # Enable NFS Debugging # # Add support for the EXT2FS filesystem of Linux fame. Be a bit # careful with this - the ext2fs code has a tendency to lag behind # changes and not be exercised very much, so mounting read/write could # be dangerous (and even mounting read only could result in panics.) # options EXT2FS # # Add support for the ReiserFS filesystem (used in Linux). Currently, # this is limited to read-only access. # options REISERFS # Use real implementations of the aio_* system calls. There are numerous # stability and security issues in the current aio code that make it # unsuitable for inclusion on machines with untrusted local users. options VFS_AIO # Cryptographically secure random number generator; /dev/random device random # The system memory devices; /dev/mem, /dev/kmem device mem # The kernel symbol table device; /dev/ksyms device ksyms # Optional character code conversion support with LIBICONV. # Each option requires their base file system and LIBICONV. options CD9660_ICONV options MSDOSFS_ICONV options UDF_ICONV ##################################################################### # POSIX P1003.1B # Real time extensions added in the 1993 POSIX # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING options _KPOSIX_PRIORITY_SCHEDULING # p1003_1b_semaphores are very experimental, # user should be ready to assist in debugging if problems arise. options P1003_1B_SEMAPHORES # POSIX message queue options P1003_1B_MQUEUE ##################################################################### # SECURITY POLICY PARAMETERS # Support for BSM audit options AUDIT # Support for Mandatory Access Control (MAC): options MAC options MAC_BIBA options MAC_BSDEXTENDED options MAC_IFOFF options MAC_LOMAC options MAC_MLS options MAC_NONE options MAC_PARTITION options MAC_PORTACL options MAC_SEEOTHERUIDS options MAC_STUB options MAC_TEST # Support for Capsicum options CAPABILITIES # fine-grained rights on file descriptors options CAPABILITY_MODE # sandboxes with no global namespace access ##################################################################### # CLOCK OPTIONS # The granularity of operation is controlled by the kernel option HZ whose # default value (1000 on most architectures) means a granularity of 1ms # (1s/HZ). Historically, the default was 100, but finer granularity is # required for DUMMYNET and other systems on modern hardware. There are # reasonable arguments that HZ should, in fact, be 100 still; consider, # that reducing the granularity too much might cause excessive overhead in # clock interrupt processing, potentially causing ticks to be missed and thus # actually reducing the accuracy of operation. options HZ=100 # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp options PPS_SYNC # Enable support for generic feed-forward clocks in the kernel. # The feed-forward clock support is an alternative to the feedback oriented # ntpd/system clock approach, and is to be used with a feed-forward # synchronization algorithm such as the RADclock: # More info here: http://www.synclab.org/radclock options FFCLOCK ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # It is possible to wire down your SCSI devices so that a given bus, # target, and LUN always come on line as the same device unit. In # earlier versions the unit numbers were assigned in the order that # the devices were probed on the SCSI bus. This means that if you # removed a disk drive, you may have had to rewrite your /etc/fstab # file, and also that you had to be careful when adding a new disk # as it may have been probed earlier and moved your device configuration # around. (See also option GEOM_VOL for a different solution to this # problem.) # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "da3" then the first # non-wired disk will be assigned da4. # The syntax for wiring down devices is: hint.scbus.0.at="ahc0" hint.scbus.1.at="ahc1" hint.scbus.1.bus="0" hint.scbus.3.at="ahc2" hint.scbus.3.bus="0" hint.scbus.2.at="ahc2" hint.scbus.2.bus="1" hint.da.0.at="scbus0" hint.da.0.target="0" hint.da.0.unit="0" hint.da.1.at="scbus3" hint.da.1.target="1" hint.da.2.at="scbus2" hint.da.2.target="3" hint.sa.1.at="scbus1" hint.sa.1.target="6" # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The ch driver drives SCSI Media Changer ("jukebox") devices. # # The da driver drives SCSI Direct Access ("disk") and Optical Media # ("WORM") devices. # # The sa driver drives SCSI Sequential Access ("tape") devices. # # The cd driver drives SCSI Read Only Direct Access ("cd") devices. # # The ses driver drives SCSI Environment Services ("ses") and # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices. # # The pt driver drives SCSI Processor devices. # # The sg driver provides a passthrough API that is compatible with the # Linux SG driver. It will work in conjunction with the COMPAT_LINUX # option to run linux SG apps. It can also stand on its own and provide # source level API compatibility for porting apps to FreeBSD. # # Target Mode support is provided here but also requires that a SIM # (SCSI Host Adapter Driver) provide support as well. # # The targ driver provides target mode support as a Processor type device. # It exists to give the minimal context necessary to respond to Inquiry # commands. There is a sample user application that shows how the rest # of the command support might be done in /usr/share/examples/scsi_target. # # The targbh driver provides target mode support and exists to respond # to incoming commands that do not otherwise have a logical unit assigned # to them. # # The pass driver provides a passthrough API to access the CAM subsystem. device scbus #base SCSI code device ch #SCSI media changers device da #SCSI direct access devices (aka disks) device sa #SCSI tapes device cd #SCSI CD-ROMs device ses #Enclosure Services (SES and SAF-TE) device pt #SCSI processor device targ #SCSI Target Mode Code device targbh #SCSI Target Mode Blackhole Device device pass #CAM passthrough driver device sg #Linux SCSI passthrough device ctl #CAM Target Layer # CAM OPTIONS: # debugging options: # CAMDEBUG Compile in all possible debugging. # CAM_DEBUG_COMPILE Debug levels to compile in. # CAM_DEBUG_FLAGS Debug levels to enable on boot. # CAM_DEBUG_BUS Limit debugging to the given bus. # CAM_DEBUG_TARGET Limit debugging to the given target. # CAM_DEBUG_LUN Limit debugging to the given lun. # CAM_DEBUG_DELAY Delay in us after printing each debug line. # # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter) # queue after a bus reset, and the number of milliseconds to # freeze the device queue after a bus device reset. This # can be changed at boot and runtime with the # kern.cam.scsi_delay tunable/sysctl. options CAMDEBUG options CAM_DEBUG_COMPILE=-1 options CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH) options CAM_DEBUG_BUS=-1 options CAM_DEBUG_TARGET=-1 options CAM_DEBUG_LUN=-1 options CAM_DEBUG_DELAY=1 options CAM_MAX_HIGHPOWER=4 options SCSI_NO_SENSE_STRINGS options SCSI_NO_OP_STRINGS options SCSI_DELAY=5000 # Be pessimistic about Joe SCSI device # Options for the CAM CDROM driver: # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only # enforced if there is I/O waiting for another LUN # The compiled in defaults for these variables are 2 and 10 seconds, # respectively. # # These can also be changed on the fly with the following sysctl variables: # kern.cam.cd.changer.min_busy_seconds # kern.cam.cd.changer.max_busy_seconds # options CHANGER_MIN_BUSY_SECONDS=2 options CHANGER_MAX_BUSY_SECONDS=10 # Options for the CAM sequential access driver: # SA_IO_TIMEOUT: Timeout for read/write/wfm operations, in minutes # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT. options SA_IO_TIMEOUT=4 options SA_SPACE_TIMEOUT=60 options SA_REWIND_TIMEOUT=(2*60) options SA_ERASE_TIMEOUT=(4*60) options SA_1FM_AT_EOD # Optional timeout for the CAM processor target (pt) device # This is specified in seconds. The default is 60 seconds. options SCSI_PT_DEFAULT_TIMEOUT=60 # Optional enable of doing SES passthrough on other devices (e.g., disks) # # Normally disabled because a lot of newer SCSI disks report themselves # as having SES capabilities, but this can then clot up attempts to build # a topology with the SES device that's on the box these drives are in.... options SES_ENABLE_PASSTHROUGH ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device pty #BSD-style compatibility pseudo ttys device nmdm #back-to-back tty devices device md #Memory/malloc disk device snp #Snoop device - to look at pty/vty/etc.. device ccd #Concatenated disk driver device firmware #firmware(9) support # Kernel side iconv library options LIBICONV # Size of the kernel message buffer. Should be N * pagesize. options MSGBUF_SIZE=40960 ##################################################################### # HARDWARE DEVICE CONFIGURATION # For ISA the required hints are listed. # EISA, MCA, PCI, CardBus, SD/MMC and pccard are self identifying buses, so # no hints are needed. # # Mandatory devices: # # These options are valid for other keyboard drivers as well. options KBD_DISABLE_KEYMAP_LOAD # refuse to load a keymap options KBD_INSTALL_CDEV # install a CDEV entry in /dev options FB_DEBUG # Frame buffer debugging device splash # Splash screen and screen saver support # Various screen savers. device blank_saver device daemon_saver device dragon_saver device fade_saver device fire_saver device green_saver device logo_saver device rain_saver device snake_saver device star_saver device warp_saver # The syscons console driver (SCO color console compatible). device sc hint.sc.0.at="isa" options MAXCONS=16 # number of virtual consoles options SC_ALT_MOUSE_IMAGE # simplified mouse cursor in text mode options SC_DFLT_FONT # compile font in makeoptions SC_DFLT_FONT=cp850 options SC_DISABLE_KDBKEY # disable `debug' key options SC_DISABLE_REBOOT # disable reboot key sequence options SC_HISTORY_SIZE=200 # number of history buffer lines options SC_MOUSE_CHAR=0x3 # char code for text mode mouse cursor options SC_PIXEL_MODE # add support for the raster text mode # The following options will let you change the default colors of syscons. options SC_NORM_ATTR=(FG_GREEN|BG_BLACK) options SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN) options SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK) options SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED) # The following options will let you change the default behavior of # cut-n-paste feature options SC_CUT_SPACES2TABS # convert leading spaces into tabs options SC_CUT_SEPCHARS=\"x09\" # set of characters that delimit words # (default is single space - \"x20\") # If you have a two button mouse, you may want to add the following option # to use the right button of the mouse to paste text. options SC_TWOBUTTON_MOUSE # You can selectively disable features in syscons. options SC_NO_CUTPASTE options SC_NO_FONT_LOADING options SC_NO_HISTORY options SC_NO_MODE_CHANGE options SC_NO_SYSMOUSE options SC_NO_SUSPEND_VTYSWITCH # `flags' for sc # 0x80 Put the video card in the VESA 800x600 dots, 16 color mode # 0x100 Probe for a keyboard device periodically if one is not present # Enable experimental features of the syscons terminal emulator (teken). options TEKEN_CONS25 # cons25-style terminal emulation options TEKEN_UTF8 # UTF-8 output handling # The vt video console driver. device vt options VT_ALT_TO_ESC_HACK=1 # Prepend ESC sequence to ALT keys options VT_MAXWINDOWS=16 # Number of virtual consoles options VT_TWOBUTTON_MOUSE # Use right mouse button to paste # The following options set the default framebuffer size. options VT_FB_DEFAULT_HEIGHT=480 options VT_FB_DEFAULT_WIDTH=640 # The following options will let you change the default vt terminal colors. options TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK) options TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK) # # Optional devices: # # # SCSI host adapters: # # adv: All Narrow SCSI bus AdvanSys controllers. # adw: Second Generation AdvanSys controllers including the ADV940UW. # aha: Adaptec 154x/1535/1640 # ahb: Adaptec 174x EISA controllers # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/ # 19160x/29160x, aic7770/aic78xx # ahd: Adaptec 29320/39320 Controllers. # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS) # bt: Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x, # BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers # including the AMD Am53C974 (found on devices such as the Tekram # DC-390(T)) and the Sun ESP and FAS families of controllers # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters, # ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2, # ISP 12160 Ultra3 SCSI, # Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters. # Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters. # Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters. # ispfw: Firmware module for Qlogic host adapters # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4 # or FC9x9 Fibre Channel host adapters. # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters. # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors: # 53C810, 53C810A, 53C815, 53C825, 53C825A, 53C860, 53C875, # 53C876, 53C885, 53C895, 53C895A, 53C896, 53C897, 53C1510D, # 53C1010-33, 53C1010-66. # trm: Tekram DC395U/UW/F DC315U adapters. # wds: WD7000 # # Note that the order is important in order for Buslogic ISA/EISA cards to be # probed correctly. # device bt hint.bt.0.at="isa" hint.bt.0.port="0x330" device adv hint.adv.0.at="isa" device adw device aha hint.aha.0.at="isa" device aic hint.aic.0.at="isa" device ahb device ahc device ahd device esp device iscsi_initiator device isp hint.isp.0.disable="1" hint.isp.0.role="3" hint.isp.0.prefer_iomap="1" hint.isp.0.prefer_memmap="1" hint.isp.0.fwload_disable="1" hint.isp.0.ignore_nvram="1" hint.isp.0.fullduplex="1" hint.isp.0.topology="lport" hint.isp.0.topology="nport" hint.isp.0.topology="lport-only" hint.isp.0.topology="nport-only" # we can't get u_int64_t types, nor can we get strings if it's got # a leading 0x, hence this silly dodge. hint.isp.0.portwnn="w50000000aaaa0000" hint.isp.0.nodewnn="w50000000aaaa0001" device ispfw device mpt device ncr device sym device trm device wds hint.wds.0.at="isa" hint.wds.0.port="0x350" hint.wds.0.irq="11" hint.wds.0.drq="6" # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # Dump the contents of the ahc controller configuration PROM. options AHC_DUMP_EEPROM # Bitmap of units to enable targetmode operations. options AHC_TMODE_ENABLE # Compile in Aic7xxx Debugging code. options AHC_DEBUG # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h options AHC_DEBUG_OPTS # Print register bitfields in debug output. Adds ~128k to driver # See ahc(4). options AHC_REG_PRETTY_PRINT # Compile in aic79xx debugging code. options AHD_DEBUG # Aic79xx driver debugging options. Adds ~215k to driver. See ahd(4). options AHD_DEBUG_OPTS=0xFFFFFFFF # Print human-readable register definitions when debugging options AHD_REG_PRETTY_PRINT # Bitmap of units to enable targetmode operations. options AHD_TMODE_ENABLE # The adw driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. options ADW_ALLOW_MEMIO # Options used in dev/iscsi (Software iSCSI stack) # options ISCSI_INITIATOR_DEBUG=9 # Options used in dev/isp/ (Qlogic SCSI/FC driver). # # ISP_TARGET_MODE - enable target mode operation # options ISP_TARGET_MODE=1 # # ISP_DEFAULT_ROLES - default role # none=0 # target=1 # initiator=2 # both=3 (not supported currently) # # ISP_INTERNAL_TARGET (trivial internal disk target, for testing) # options ISP_DEFAULT_ROLES=0 # Options used in dev/sym/ (Symbios SCSI driver). #options SYM_SETUP_LP_PROBE_MAP #-Low Priority Probe Map (bits) # Allows the ncr to take precedence # 1 (1<<0) -> 810a, 860 # 2 (1<<1) -> 825a, 875, 885, 895 # 4 (1<<2) -> 895a, 896, 1510d #options SYM_SETUP_SCSI_DIFF #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 #options SYM_SETUP_PCI_PARITY #-PCI parity checking # disabled:0, enabled:1 (default) #options SYM_SETUP_MAX_LUN #-Number of LUNs supported # default:8, range:[1..64] # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/). # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O. # The DPT controllers are commonly re-licensed under other brand-names - # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and # Compaq are actually DPT controllers. # # See src/sys/dev/dpt for debugging and other subtle options. # DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various # instruments are enabled. The tools in # /usr/sbin/dpt_* assume these to be enabled. # DPT_DEBUG_xxxx These are controllable from sys/dev/dpt/dpt.h # DPT_RESET_HBA Make "reset" actually reset the controller # instead of fudging it. Only enable this if you # are 100% certain you need it. device dpt # DPT options #!CAM# options DPT_MEASURE_PERFORMANCE options DPT_RESET_HBA # # Compaq "CISS" RAID controllers (SmartRAID 5* series) # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device ciss # # Intel Integrated RAID controllers. # This driver was developed and is maintained by Intel. Contacts # at Intel for this driver are # "Kannanthanam, Boji T" and # "Leubner, Achim" . # device iir # # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later # firmware. These controllers have a SCSI-like interface, and require # the CAM infrastructure. # device mly # # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers. Only # one entry is needed; the code will find and configure all supported # controllers. # device ida # Compaq Smart RAID device mlx # Mylex DAC960 device amr # AMI MegaRAID device amrp # SCSI Passthrough interface (optional, CAM req.) device mfi # LSI MegaRAID SAS device mfip # LSI MegaRAID SAS passthrough, requires CAM options MFI_DEBUG device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s # # 3ware ATA RAID # device twe # 3ware ATA RAID # # Serial ATA host controllers: # # ahci: Advanced Host Controller Interface (AHCI) compatible # mvs: Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers # # These drivers are part of cam(4) subsystem. They supersede less featured # ata(4) subsystem drivers, supporting same hardware. device ahci device mvs device siis # # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including # PC Card devices. You only need one "device ata" for it to find all # PCI and PC Card ATA/ATAPI devices on modern machines. # Alternatively, individual bus and chipset drivers may be chosen by using # the 'atacore' driver then selecting the drivers on a per vendor basis. # For example to build a system which only supports a VIA chipset, # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers. device ata # Modular ATA #device atacore # Core ATA functionality #device atacard # CARDBUS support #device atabus # PC98 cbus support #device ataisa # ISA bus support #device atapci # PCI bus support; only generic chipset support # PCI ATA chipsets #device ataacard # ACARD #device ataacerlabs # Acer Labs Inc. (ALI) #device ataamd # American Micro Devices (AMD) #device ataati # ATI #device atacenatek # Cenatek #device atacypress # Cypress #device atacyrix # Cyrix #device atahighpoint # HighPoint #device ataintel # Intel #device ataite # Integrated Technology Inc. (ITE) #device atajmicron # JMicron #device atamarvell # Marvell #device atamicron # Micron #device atanational # National #device atanetcell # NetCell #device atanvidia # nVidia #device atapromise # Promise #device ataserverworks # ServerWorks #device atasiliconimage # Silicon Image Inc. (SiI) (formerly CMD) #device atasis # Silicon Integrated Systems Corp.(SiS) #device atavia # VIA Technologies Inc. # # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add: hint.ata.0.at="isa" hint.ata.0.port="0x1f0" hint.ata.0.irq="14" hint.ata.1.at="isa" hint.ata.1.port="0x170" hint.ata.1.irq="15" # # The following options are valid on the ATA driver: # # ATA_STATIC_ID: controller numbering is static ie depends on location # else the device numbers are dynamically allocated. # ATA_REQUEST_TIMEOUT: the number of seconds to wait for an ATA request # before timing out. options ATA_STATIC_ID #options ATA_REQUEST_TIMEOUT=10 # # Standard floppy disk controllers and floppy tapes, supports # the Y-E DATA External FDD (PC Card) # device fdc hint.fdc.0.at="isa" hint.fdc.0.port="0x3F0" hint.fdc.0.irq="6" hint.fdc.0.drq="2" # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # # Activate this line if you happen to have an Insight floppy tape. # Probing them proved to be dangerous for people with floppy disks only, # so it's "hidden" behind a flag: #hint.fdc.0.flags="1" # Specify floppy devices hint.fd.0.at="fdc0" hint.fd.0.drive="0" hint.fd.1.at="fdc0" hint.fd.1.drive="1" # # uart: newbusified driver for serial interfaces. It consolidates the sio(4), # sab(4) and zs(4) drivers. # device uart # Options for uart(4) options UART_PPS_ON_CTS # Do time pulse capturing using CTS # instead of DCD. options UART_POLL_FREQ # Set polling rate, used when hw has # no interrupt support (50 Hz default). # The following hint should only be used for pure ISA devices. It is not # needed otherwise. Use of hints is strongly discouraged. hint.uart.0.at="isa" # The following 3 hints are used when the UART is a system device (i.e., a # console or debug port), but only on platforms that don't have any other # means to pass the information to the kernel. The unit number of the hint # is only used to bundle the hints together. There is no relation to the # unit number of the probed UART. hint.uart.0.port="0x3f8" hint.uart.0.flags="0x10" hint.uart.0.baud="115200" # `flags' for serial drivers that support consoles like sio(4) and uart(4): # 0x10 enable console support for this unit. Other console flags # (if applicable) are ignored unless this is set. Enabling # console support does not make the unit the preferred console. # Boot with -h or set boot_serial=YES in the loader. For sio(4) # specifically, the 0x20 flag can also be set (see above). # Currently, at most one unit can have console support; the # first one (in config file order) with this flag set is # preferred. Setting this flag for sio0 gives the old behavior. # 0x80 use this port for serial line gdb support in ddb. Also known # as debug port. # # Options for serial drivers that support consoles: options BREAK_TO_DEBUGGER # A BREAK/DBG on the console goes to # ddb, if available. # Solaris implements a new BREAK which is initiated by a character # sequence CR ~ ^b which is similar to a familiar pattern used on # Sun servers by the Remote Console. There are FreeBSD extensions: # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot. options ALT_BREAK_TO_DEBUGGER # Serial Communications Controller # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel # communications controllers. device scc # PCI Universal Communications driver # Supports various multi port PCI I/O cards. device puc # # Network interfaces: # # MII bus support is required for many PCI Ethernet NICs, # namely those which use MII-compliant transceivers or implement # transceiver control interfaces that operate like an MII. Adding # "device miibus" to the kernel config pulls in support for the generic # miibus API, the common support for for bit-bang'ing the MII and all # of the PHY drivers, including a generic one for PHYs that aren't # specifically handled by an individual driver. Support for specific # PHYs may be built by adding "device mii", "device mii_bitbang" if # needed by the NIC driver and then adding the appropriate PHY driver. device mii # Minimal MII support device mii_bitbang # Common module for bit-bang'ing the MII device miibus # MII support w/ bit-bang'ing and all PHYs device acphy # Altima Communications AC101 device amphy # AMD AM79c873 / Davicom DM910{1,2} device atphy # Attansic/Atheros F1 device axphy # Asix Semiconductor AX88x9x device bmtphy # Broadcom BCM5201/BCM5202 and 3Com 3c905C device brgphy # Broadcom BCM54xx/57xx 1000baseTX device ciphy # Cicada/Vitesse CS/VSC8xxx device e1000phy # Marvell 88E1000 1000/100/10-BT device gentbi # Generic 10-bit 1000BASE-{LX,SX} fiber ifaces device icsphy # ICS ICS1889-1893 device ip1000phy # IC Plus IP1000A/IP1001 device jmphy # JMicron JMP211/JMP202 device lxtphy # Level One LXT-970 device mlphy # Micro Linear 6692 device nsgphy # NatSemi DP8361/DP83865/DP83891 device nsphy # NatSemi DP83840A device nsphyter # NatSemi DP83843/DP83815 device pnaphy # HomePNA device qsphy # Quality Semiconductor QS6612 device rdcphy # RDC Semiconductor R6040 device rgephy # RealTek 8169S/8110S/8211B/8211C device rlphy # RealTek 8139 device rlswitch # RealTek 8305 device smcphy # SMSC LAN91C111 device tdkphy # TDK 89Q2120 device tlphy # Texas Instruments ThunderLAN device truephy # LSI TruePHY device xmphy # XaQti XMAC II # an: Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA, # PCI and ISA varieties. # ae: Support for gigabit ethernet adapters based on the Attansic/Atheros # L2 PCI-Express FastEthernet controllers. # age: Support for gigabit ethernet adapters based on the Attansic/Atheros # L1 PCI express gigabit ethernet controllers. # alc: Support for Atheros AR8131/AR8132 PCIe ethernet controllers. # ale: Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers. # ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan) # bce: Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet # adapters. # bfe: Broadcom BCM4401 Ethernet adapter. # bge: Support for gigabit ethernet adapters based on the Broadcom # BCM570x family of controllers, including the 3Com 3c996-T, # the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and # the embedded gigE NICs on Dell PowerEdge 2550 servers. # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # bwi: Broadcom BCM430* and BCM431* family of wireless adapters. # bwn: Broadcom BCM43xx family of wireless adapters. # cas: Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn # cm: Arcnet SMC COM90c26 / SMC COM90c56 # (and SMC COM90c66 in '56 compatibility mode) adapters. # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters. # cxgbe:Chelsio T4 and T5 based 1GbE/10GbE/40GbE PCIe Ethernet adapters. # dc: Support for PCI fast ethernet adapters based on the DEC/Intel 21143 # and various workalikes including: # the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics # AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On # 82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II # and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver # replaces the old al, ax, dm, pn and mx drivers. List of brands: # Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, # SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, # LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204, # KNE110TX. # de: Digital Equipment DC21040 # em: Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters. # igb: Intel Pro/1000 PCI Express Gigabit Ethernet: 82575 and later adapters. # ep: 3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589 # and PC Card devices using these chipsets. # ex: Intel EtherExpress Pro/10 and other i82595-based adapters, # Olicom Ethernet PC Card devices. # fe: Fujitsu MB86960A/MB86965A Ethernet # fea: DEC DEFEA EISA FDDI adapter # fpa: Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed. # fxp: Intel EtherExpress Pro/100B # (hint of prefer_iomap can be done to prefer I/O instead of Mem mapping) # gem: Apple GMAC/Sun ERI/Sun GEM # hme: Sun HME (Happy Meal Ethernet) # jme: JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters. # le: AMD Am7900 LANCE and Am79C9xx PCnet # lge: Support for PCI gigabit ethernet adapters based on the Level 1 # LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX, # SMC TigerCard 1000 (SMC9462SX), and some Addtron cards. # malo: Marvell Libertas wireless NICs. # mwl: Marvell 88W8363 802.11n wireless NICs. # Requires the mwl firmware module # mwlfw: Marvell 88W8363 firmware # msk: Support for gigabit ethernet adapters based on the Marvell/SysKonnect # Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061, # 88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053, # 88E8055, 88E8056 and D-Link 560T/550SX. # lmc: Support for the LMC/SBE wide-area network interface cards. # my: Myson Fast Ethernet (MTD80X, MTD89X) # nge: Support for PCI gigabit ethernet adapters based on the National # Semiconductor DP83820 and DP83821 chipset. This includes the # SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet # GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom # EP-320G-TX and the Netgear GA622T. # oce: Emulex 10 Gbit adapters (OneConnect Ethernet) # pcn: Support for PCI fast ethernet adapters based on the AMD Am79c97x # PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home # chipsets. These can also be handled by the le(4) driver if the # pcn(4) driver is left out of the kernel. The le(4) driver does not # support the additional features like the MII bus and burst mode of # the PCnet-FAST and greater chipsets though. # ral: Ralink Technology IEEE 802.11 wireless adapter # re: RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter # rl: Support for PCI fast ethernet adapters based on the RealTek 8129/8139 # chipset. Note that the RealTek driver defaults to using programmed # I/O to do register accesses because memory mapped mode seems to cause # severe lockups on SMP hardware. This driver also supports the # Accton EN1207D `Cheetah' adapter, which uses a chip called # the MPX 5030/5038, which is either a RealTek in disguise or a # RealTek workalike. Note that the D-Link DFE-530TX+ uses the RealTek # chipset and is supported by this driver, not the 'vr' driver. # sf: Support for Adaptec Duralink PCI fast ethernet adapters based on the # Adaptec AIC-6915 "starfire" controller. # This includes dual and quad port cards, as well as one 100baseFX card. # Most of these are 64-bit PCI devices, except for one single port # card which is 32-bit. # sge: Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter # sis: Support for NICs based on the Silicon Integrated Systems SiS 900, # SiS 7016 and NS DP83815 PCI fast ethernet controller chips. # sk: Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs. # This includes the SK-9841 and SK-9842 single port cards (single mode # and multimode fiber) and the SK-9843 and SK-9844 dual port cards # (also single mode and multimode). # The driver will autodetect the number of ports on the card and # attach each one as a separate network interface. # sn: Support for ISA and PC Card Ethernet devices using the # SMC91C90/92/94/95 chips. # ste: Sundance Technologies ST201 PCI fast ethernet controller, includes # the D-Link DFE-550TX. # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack # TC9021 family of controllers, including the Sundance ST2021/ST2023, # the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101. # ti: Support for PCI gigabit ethernet NICs based on the Alteon Networks # Tigon 1 and Tigon 2 chipsets. This includes the Alteon AceNIC, the # 3Com 3c985, the Netgear GA620 and various others. Note that you will # probably want to bump up kern.ipc.nmbclusters a lot to use this driver. # tl: Support for the Texas Instruments TNETE100 series 'ThunderLAN' # cards and integrated ethernet controllers. This includes several # Compaq Netelligent 10/100 cards and the built-in ethernet controllers # in several Compaq Prosignia, Proliant and Deskpro systems. It also # supports several Olicom 10Mbps and 10/100 boards. # tx: SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series) # txp: Support for 3Com 3cR990 cards with the "Typhoon" chipset # vr: Support for various fast ethernet adapters based on the VIA # Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips, # including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for # DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320. # vte: DM&P Vortex86 RDC R6040 Fast Ethernet # vx: 3Com 3C590 and 3C595 # wb: Support for fast ethernet adapters based on the Winbond W89C840F chip. # Note: this is not the same as the Winbond W89C940F, which is a # NE2000 clone. # wi: Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both # the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA # bridge with a PCMCIA adapter plugged into it. # xe: Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller, # Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card, # Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56 # xl: Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast) # Etherlink XL cards and integrated controllers. This includes the # integrated 3c905B-TX chips in certain Dell Optiplex and Dell # Precision desktop machines and the integrated 3c905-TX chips # in Dell Latitude laptop docking stations. # Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX # Order for ISA/EISA devices is important here device cm hint.cm.0.at="isa" hint.cm.0.port="0x2e0" hint.cm.0.irq="9" hint.cm.0.maddr="0xdc000" device ep device ex device fe hint.fe.0.at="isa" hint.fe.0.port="0x300" device fea device sn hint.sn.0.at="isa" hint.sn.0.port="0x300" hint.sn.0.irq="10" device an device wi device xe # PCI Ethernet NICs that use the common MII bus controller code. device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4 and T5 1GbE/10GbE/40GbE device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) hint.fxp.0.prefer_iomap="0" device gem # Apple GMAC/Sun ERI/Sun GEM device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device my # Myson Fast Ethernet (MTD80X, MTD89X) device nge # NatSemi DP83820 gigabit Ethernet device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device pcn # AMD Am79C97x PCI 10/100 NICs device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # PCI Ethernet NICs. device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel Pro/1000 Gigabit Ethernet device igb # Intel Pro/1000 PCIE Gigabit Ethernet device ixgb # Intel Pro/10Gbe PCI-X Ethernet device ix # Intel Pro/10Gbe PCIE Ethernet device ixv # Intel Pro/10Gbe PCIE Ethernet VF device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device nxge # Neterion Xframe 10GbE Server/Storage Adapter device oce # Emulex 10 GbE (OneConnect Ethernet) device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') device vxge # Exar/Neterion XFrame 3100 10GbE # PCI FDDI NICs. device fpa # PCI WAN adapters. device lmc # PCI IEEE 802.11 Wireless NICs device ath # Atheros pci/cardbus NIC's device ath_hal # pci/cardbus chip support #device ath_ar5210 # AR5210 chips #device ath_ar5211 # AR5211 chips #device ath_ar5212 # AR5212 chips #device ath_rf2413 #device ath_rf2417 #device ath_rf2425 #device ath_rf5111 #device ath_rf5112 #device ath_rf5413 #device ath_ar5416 # AR5416 chips options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors # All of the AR5212 parts have a problem when paired with the AR71xx # CPUS. These parts have a bug that triggers a fatal bus error on the AR71xx # only. Details of the exact nature of the bug are sketchy, but some can be # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and # 6. This option enables this workaround. There is a performance penalty # for this work around, but without it things don't work at all. The DMA # from the card usually bursts 128 bytes, but on the affected CPUs, only # 4 are safe. options AH_RXCFG_SDMAMW_4BYTES #device ath_ar9160 # AR9160 chips #device ath_ar9280 # AR9280 chips #device ath_ar9285 # AR9285 chips device ath_rate_sample # SampleRate tx rate control for ath device bwi # Broadcom BCM430* BCM431* device bwn # Broadcom BCM43xx device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device mwlfw device ral # Ralink Technology RT2500 wireless NICs. # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers. #options TI_SF_BUF_JUMBO # Turn on the header splitting option for the ti(4) driver firmware. This # only works for Tigon II chips, and has no effect for Tigon I chips. # This option requires the TI_SF_BUF_JUMBO option above. #options TI_JUMBO_HDRSPLIT # These two options allow manipulating the mbuf cluster size and mbuf size, # respectively. Be very careful with NIC driver modules when changing # these from their default values, because that can potentially cause a # mismatch between the mbuf size assumed by the kernel and the mbuf size # assumed by a module. The only driver that currently has the ability to # detect a mismatch is ti(4). options MCLSHIFT=12 # mbuf cluster shift in bits, 12 == 4KB options MSIZE=512 # mbuf size in bytes # # ATM related options (Cranor version) # (note: this driver cannot be used with the HARP ATM stack) # # The `en' device provides support for Efficient Networks (ENI) # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0). # # The `hatm' device provides support for Fore/Marconi HE155 and HE622 # ATM PCI cards. # # The `fatm' device provides support for Fore PCA200E ATM PCI cards. # # The `patm' device provides support for IDT77252 based cards like # ProSum's ProATM-155 and ProATM-25 and IDT's evaluation boards. # # atm device provides generic atm functions and is required for # atm devices. # NATM enables the netnatm protocol family that can be used to # bypass TCP/IP. # # utopia provides the access to the ATM PHY chips and is required for en, # hatm and fatm. # # the current driver supports only PVC operations (no atm-arp, no multicast). # for more details, please read the original documents at # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html # device atm device en device fatm #Fore PCA200E device hatm #Fore/Marconi HE155/622 device patm #IDT77252 cards (ProATM and IDT) device utopia #ATM PHY driver options NATM #native ATM options LIBMBPOOL #needed by patm, iatm # # Sound drivers # # sound: The generic sound driver. # device sound # # snd_*: Device-specific drivers. # # The flags of the device tell the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # snd_ad1816: Analog Devices AD1816 ISA PnP/non-PnP. # snd_als4000: Avance Logic ALS4000 PCI. # snd_atiixp: ATI IXP 200/300/400 PCI. # snd_audiocs: Crystal Semiconductor CS4231 SBus/EBus. Only # for sparc64. # snd_cmi: CMedia CMI8338/CMI8738 PCI. # snd_cs4281: Crystal Semiconductor CS4281 PCI. # snd_csa: Crystal Semiconductor CS461x/428x PCI. (except # 4281) # snd_ds1: Yamaha DS-1 PCI. # snd_emu10k1: Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI. # snd_emu10kx: Creative SoundBlaster Live! and Audigy # snd_envy24: VIA Envy24 and compatible, needs snd_spicds. # snd_envy24ht: VIA Envy24HT and compatible, needs snd_spicds. # snd_es137x: Ensoniq AudioPCI ES137x PCI. # snd_ess: Ensoniq ESS ISA PnP/non-PnP, to be used in # conjunction with snd_sbc. # snd_fm801: Forte Media FM801 PCI. # snd_gusc: Gravis UltraSound ISA PnP/non-PnP. # snd_hda: Intel High Definition Audio (Controller) and # compatible. # snd_hdspe: RME HDSPe AIO and RayDAT. # snd_ich: Intel ICH AC'97 and some more audio controllers # embedded in a chipset, for example nVidia # nForce controllers. # snd_maestro: ESS Technology Maestro-1/2x PCI. # snd_maestro3: ESS Technology Maestro-3/Allegro PCI. # snd_mss: Microsoft Sound System ISA PnP/non-PnP. # snd_neomagic: Neomagic 256 AV/ZX PCI. # snd_sb16: Creative SoundBlaster16, to be used in # conjunction with snd_sbc. # snd_sb8: Creative SoundBlaster (pre-16), to be used in # conjunction with snd_sbc. # snd_sbc: Creative SoundBlaster ISA PnP/non-PnP. # Supports ESS and Avance ISA chips as well. # snd_solo: ESS Solo-1x PCI. # snd_spicds: SPI codec driver, needed by Envy24/Envy24HT drivers. # snd_t4dwave: Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs # M5451 PCI. # snd_uaudio: USB audio. # snd_via8233: VIA VT8233x PCI. # snd_via82c686: VIA VT82C686A PCI. # snd_vibes: S3 Sonicvibes PCI. device snd_ad1816 device snd_als4000 device snd_atiixp #device snd_audiocs device snd_cmi device snd_cs4281 device snd_csa device snd_ds1 device snd_emu10k1 device snd_emu10kx device snd_envy24 device snd_envy24ht device snd_es137x device snd_ess device snd_fm801 device snd_gusc device snd_hda device snd_hdspe device snd_ich device snd_maestro device snd_maestro3 device snd_mss device snd_neomagic device snd_sb16 device snd_sb8 device snd_sbc device snd_solo device snd_spicds device snd_t4dwave device snd_uaudio device snd_via8233 device snd_via82c686 device snd_vibes # For non-PnP sound cards: hint.pcm.0.at="isa" hint.pcm.0.irq="10" hint.pcm.0.drq="1" hint.pcm.0.flags="0x0" hint.sbc.0.at="isa" hint.sbc.0.port="0x220" hint.sbc.0.irq="5" hint.sbc.0.drq="1" hint.sbc.0.flags="0x15" hint.gusc.0.at="isa" hint.gusc.0.port="0x220" hint.gusc.0.irq="5" hint.gusc.0.drq="1" hint.gusc.0.flags="0x13" # # Following options are intended for debugging/testing purposes: # # SND_DEBUG Enable extra debugging code that includes # sanity checking and possible increase of # verbosity. # # SND_DIAGNOSTIC Similar in a spirit of INVARIANTS/DIAGNOSTIC, # zero tolerance against inconsistencies. # # SND_FEEDER_MULTIFORMAT By default, only 16/32 bit feeders are compiled # in. This options enable most feeder converters # except for 8bit. WARNING: May bloat the kernel. # # SND_FEEDER_FULL_MULTIFORMAT Ditto, but includes 8bit feeders as well. # # SND_FEEDER_RATE_HP (feeder_rate) High precision 64bit arithmetic # as much as possible (the default trying to # avoid it). Possible slowdown. # # SND_PCM_64 (Only applicable for i386/32bit arch) # Process 32bit samples through 64bit # integer/arithmetic. Slight increase of dynamic # range at a cost of possible slowdown. # # SND_OLDSTEREO Only 2 channels are allowed, effectively # disabling multichannel processing. # options SND_DEBUG options SND_DIAGNOSTIC options SND_FEEDER_MULTIFORMAT options SND_FEEDER_FULL_MULTIFORMAT options SND_FEEDER_RATE_HP options SND_PCM_64 options SND_OLDSTEREO # # Miscellaneous hardware: # # scd: Sony CD-ROM using proprietary (non-ATAPI) interface # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board # joy: joystick (including IO DATA PCJOY PC Card joystick) # cmx: OmniKey CardMan 4040 pccard smartcard reader # Mitsumi CD-ROM device mcd hint.mcd.0.at="isa" hint.mcd.0.port="0x300" # for the Sony CDU31/33A CDROM device scd hint.scd.0.at="isa" hint.scd.0.port="0x230" device joy # PnP aware, hints for non-PnP only hint.joy.0.at="isa" hint.joy.0.port="0x201" device cmx # # The 'bktr' device is a PCI video capture device using the Brooktree # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator, # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo. # # options OVERRIDE_CARD=xxx # options OVERRIDE_TUNER=xxx # options OVERRIDE_MSP=1 # options OVERRIDE_DBX=1 # These options can be used to override the auto detection # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h # Using sysctl(8) run-time overrides on a per-card basis can be made # # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL # or # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC # Specifies the default video capture mode. # This is required for Dual Crystal (28&35MHz) boards where PAL is used # to prevent hangs during initialization, e.g. VideoLogic Captivator PCI. # # options BKTR_USE_PLL # This is required for PAL or SECAM boards with a 28MHz crystal and no 35MHz # crystal, e.g. some new Bt878 cards. # # options BKTR_GPIO_ACCESS # This enables IOCTLs which give user level access to the GPIO port. # # options BKTR_NO_MSP_RESET # Prevents the MSP34xx reset. Good if you initialize the MSP in another OS first # # options BKTR_430_FX_MODE # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode. # # options BKTR_SIS_VIA_MODE # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is # needed for some old SiS and VIA chipset motherboards. # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset # motherboards and motherboards with bad or incomplete PCI 2.1 support. # As a rough guess, old = before 1998 # # options BKTR_NEW_MSP34XX_DRIVER # Use new, more complete initialization scheme for the msp34* soundchip. # Should fix stereo autodetection if the old driver does only output # mono sound. # # options BKTR_USE_FREEBSD_SMBUS # Compile with FreeBSD SMBus implementation # # Brooktree driver has been ported to the new I2C framework. Thus, # you'll need to have the following 3 lines in the kernel config. # device smbus # device iicbus # device iicbb # device iicsmb # The iic and smb devices are only needed if you want to control other # I2C slaves connected to the external connector of some cards. # device bktr # # PC Card/PCMCIA and Cardbus # # cbb: pci/cardbus bridge implementing YENTA interface # pccard: pccard slots # cardbus: cardbus slots device cbb device pccard device cardbus # # MMC/SD # # mmc MMC/SD bus # mmcsd MMC/SD memory card # sdhci Generic PCI SD Host Controller # device mmc device mmcsd device sdhci # # SMB bus # # System Management Bus support is provided by the 'smbus' device. # Access to the SMBus device is via the 'smb' device (/dev/smb*), # which is a child of the 'smbus' device. # # Supported devices: # smb standard I/O through /dev/smb* # # Supported SMB interfaces: # iicsmb I2C to SMB bridge with any iicbus interface # bktr brooktree848 I2C hardware interface # intpm Intel PIIX4 (82371AB, 82443MX) Power Management Unit # alpm Acer Aladdin-IV/V/Pro2 Power Management Unit # ichsmb Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA) # viapm VIA VT82C586B/596B/686A and VT8233 Power Management Unit # amdpm AMD 756 Power Management Unit # amdsmb AMD 8111 SMBus 2.0 Controller # nfpm NVIDIA nForce Power Management Unit # nfsmb NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller # ismt Intel SMBus 2.0 controller chips (on Atom S1200, C2000) # device smbus # Bus support, required for smb below. device intpm device alpm device ichsmb device viapm device amdpm device amdsmb device nfpm device nfsmb device ismt device smb # # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported devices: # ic i2c network interface # iic i2c standard io # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands. # iicoc simple polling driver for OpenCores I2C controller # # Supported interfaces: # bktr brooktree848 I2C software interface # # Other: # iicbb generic I2C bit-banging code (needed by lpbb, bktr) # device iicbus # Bus support, required for ic/iic/iicsmb below. device iicbb device ic device iic device iicsmb # smb over i2c bridge device iicoc # OpenCores I2C controller support # I2C peripheral devices # # ds133x Dallas Semiconductor DS1337, DS1338 and DS1339 RTC # ds1374 Dallas Semiconductor DS1374 RTC # ds1672 Dallas Semiconductor DS1672 RTC # s35390a Seiko Instruments S-35390A RTC # device ds133x device ds1374 device ds1672 device s35390a # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'da'), best # performance is achieved with ports in EPP 1.9 mode. # lpt Parallel Printer # plip Parallel network interface # ppi General-purpose I/O ("Geek Port") + IEEE1284 I/O # pps Pulse per second Timing Interface # lpbb Philips official parallel port I2C bit-banging interface # pcfclock Parallel port clock driver. # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # options PPC_PROBE_CHIPSET # Enable chipset specific detection # (see flags in ppc(4)) options DEBUG_1284 # IEEE1284 signaling protocol debug options PERIPH_1284 # Makes your computer act as an IEEE1284 # compliant peripheral options DONTPROBE_1284 # Avoid boot detection of PnP parallel devices options VP0_DEBUG # ZIP/ZIP+ debug options LPT_DEBUG # Printer driver debug options PPC_DEBUG # Parallel chipset level debug options PLIP_DEBUG # Parallel network IP interface debug options PCFCLOCK_VERBOSE # Verbose pcfclock driver options PCFCLOCK_MAX_RETRIES=5 # Maximum read tries (default 10) device ppc hint.ppc.0.at="isa" hint.ppc.0.irq="7" device ppbus device vpo device lpt device plip device ppi device pps device lpbb device pcfclock # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname # Requires NFSCL and NFS_ROOT options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options BOOTP_NFSV3 # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP options BOOTP_BLOCKSIZE=8192 # Override NFS block size # # Add software watchdog routines. # options SW_WATCHDOG # # Add the software deadlock resolver thread. # options DEADLKRES # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn # it back on at run-time. # # This is sometimes usable for systems which don't have any swap space # (see also sysctls "vm.defer_swapspace_pageouts" and # "vm.disable_swapspace_pageouts") # #options NO_SWAPPING # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers # for sendfile(2) that are used to map file VM pages, and normally # default to a quantity that is roughly 16*MAXUSERS+512. You would # typically want about 4 of these for each simultaneous file send. # options NSFBUFS=1024 # # Enable extra debugging code for locks. This stores the filename and # line of whatever acquired the lock in the lock itself, and changes a # number of function calls to pass around the relevant data. This is # not at all useful unless you are debugging lock code. Note that # modules should be recompiled as this option modifies KBI. # options DEBUG_LOCKS ##################################################################### # USB support # UHCI controller device uhci # OHCI controller device ohci # EHCI controller device ehci # XHCI controller device xhci # SL811 Controller #device slhci # General USB code (mandatory for USB) device usb # # USB Double Bulk Pipe devices device udbp # USB Fm Radio device ufm # USB LED device uled # Human Interface Device (anything with buttons and dials) device uhid # USB keyboard device ukbd # USB printer device ulpt # USB mass storage driver (Requires scbus and da) device umass # USB mass storage driver for device-side mode device usfs # USB support for Belkin F5U109 and Magic Control Technology serial adapters device umct # USB modem support device umodem # USB mouse device ums # USB touchpad(s) device atp device wsp # eGalax USB touch screen device uep # Diamond Rio 500 MP3 player device urio # # USB serial support device ucom # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra device u3g # USB support for Technologies ARK3116 based serial adapters device uark # USB support for Belkin F5U103 and compatible serial adapters device ubsa # USB support for serial adapters based on the FT8U100AX and FT8U232AM device uftdi # USB support for some Windows CE based serial communication. device uipaq # USB support for Prolific PL-2303 serial adapters device uplcom # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters device uslcom # USB Visor and Palm devices device uvisor # USB serial support for DDI pocket's PHS device uvscom # # ADMtek USB ethernet. Supports the LinkSys USB100TX, # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus # eval board. device aue # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the # LinkSys USB200M and various other adapters. device axe # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver. device axge # # Devices which communicate using Ethernet over USB, particularly # Communication Device Class (CDC) Ethernet specification. Supports # Sharp Zaurus PDAs, some DOCSIS cable modems and so on. device cdce # # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate # and Netmate II, and the Belkin F5U111. device cue # # Kawasaki LSI ethernet. Supports the LinkSys USB10T, # Entrega USB-NET-E45, Peracom Ethernet Adapter, the # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T, # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB # and 2104USB, and the Corega USB-T. device kue # # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX # and the GREEN HOUSE GH-USB100B. device rue # # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC. device udav # # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030. device mos # # HSxPA devices from Option N.V device uhso # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver device rsu # # Ralink Technology RT2501USB/RT2601USB wireless driver device rum # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver device run # # Atheros AR5523 wireless driver device uath # # Conexant/Intersil PrismGT wireless driver device upgt # # Ralink Technology RT2500USB wireless driver device ural # # RNDIS USB ethernet driver device urndis # Realtek RTL8187B/L wireless driver device urtw # # Realtek RTL8188CU/RTL8192CU wireless driver device urtwn # # ZyDas ZD1211/ZD1211B wireless driver device zyd # # Sierra USB wireless driver device usie # # debugging options for the USB subsystem # options USB_DEBUG options U3G_DEBUG # options for ukbd: options UKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions UKBD_DFLT_KEYMAP=jp.pc98 # options for uplcom: options UPLCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds # options for uvscom: options UVSCOM_DEFAULT_OPKTSIZE=8 # default output packet size options UVSCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds ##################################################################### # FireWire support device firewire # FireWire bus code device sbp # SCSI over Firewire (Requires scbus and da) device sbp_targ # SBP-2 Target mode (Requires scbus and targ) device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC2734 and RFC3146) ##################################################################### # dcons support (Dumb Console Device) device dcons # dumb console driver device dcons_crom # FireWire attachment options DCONS_BUF_SIZE=16384 # buffer size options DCONS_POLL_HZ=100 # polling rate options DCONS_FORCE_CONSOLE=0 # force to be the primary console options DCONS_FORCE_GDB=1 # force to be the gdb device ##################################################################### # crypto subsystem # # This is a port of the OpenBSD crypto framework. Include this when # configuring IPSEC and when you have a h/w crypto device to accelerate # user applications that link to OpenSSL. # # Drivers are ports from OpenBSD with some simple enhancements that have # been fed back to OpenBSD. device crypto # core crypto support device cryptodev # /dev/crypto for access to h/w device rndtest # FIPS 140-2 entropy tester device hifn # Hifn 7951, 7781, etc. options HIFN_DEBUG # enable debugging support: hw.hifn.debug options HIFN_RNDTEST # enable rndtest support device ubsec # Broadcom 5501, 5601, 58xx options UBSEC_DEBUG # enable debugging support: hw.ubsec.debug options UBSEC_RNDTEST # enable rndtest support ##################################################################### # # Embedded system options: # # An embedded system might want to run something other than init. options INIT_PATH=/sbin/init:/rescue/init # Debug options options BUS_DEBUG # enable newbus debugging options DEBUG_VFS_LOCKS # enable VFS lock debugging options SOCKBUF_DEBUG # enable sockbuf last record/mb tail checking options IFMEDIA_DEBUG # enable debugging in net/if_media.c # # Verbose SYSINIT # # Make the SYSINIT process performed by mi_startup() verbose. This is very # useful when porting to a new architecture. If DDB is also enabled, this # will print function names instead of addresses. options VERBOSE_SYSINIT ##################################################################### # SYSV IPC KERNEL PARAMETERS # # Maximum number of System V semaphores that can be used on the system at # one time. options SEMMNI=11 # Total number of semaphores system wide options SEMMNS=61 # Total number of undo structures in system options SEMMNU=31 # Maximum number of System V semaphores that can be used by a single process # at one time. options SEMMSL=61 # Maximum number of operations that can be outstanding on a single System V # semaphore at one time. options SEMOPM=101 # Maximum number of undo operations that can be outstanding on a single # System V semaphore at one time. options SEMUME=11 # Maximum number of shared memory pages system wide. options SHMALL=1025 # Maximum size, in bytes, of a single System V shared memory region. options SHMMAX=(SHMMAXPGS*PAGE_SIZE+1) options SHMMAXPGS=1025 # Minimum size, in bytes, of a single System V shared memory region. options SHMMIN=2 # Maximum number of shared memory regions that can be used on the system # at one time. options SHMMNI=33 # Maximum number of System V shared memory regions that can be attached to # a single process at one time. options SHMSEG=9 # Set the amount of time (in seconds) the system will wait before # rebooting automatically when a kernel panic occurs. If set to (-1), # the system will wait indefinitely until a key is pressed on the # console. options PANIC_REBOOT_WAIT_TIME=16 # Attempt to bypass the buffer cache and put data directly into the # userland buffer for read operation when O_DIRECT flag is set on the # file. Both offset and length of the read operation must be # multiples of the physical media sector size. # options DIRECTIO # Specify a lower limit for the number of swap I/O buffers. They are # (among other things) used when bypassing the buffer cache due to # DIRECTIO kernel option enabled and O_DIRECT flag set on file. # options NSWBUF_MIN=120 ##################################################################### # More undocumented options for linting. # Note that documenting these is not considered an affront. options CAM_DEBUG_DELAY # VFS cluster debugging. options CLUSTERDEBUG options DEBUG # Kernel filelock debugging. options LOCKF_DEBUG # System V compatible message queues # Please note that the values provided here are used to test kernel # building. The defaults in the sources provide almost the same numbers. # MSGSSZ must be a power of 2 between 8 and 1024. options MSGMNB=2049 # Max number of chars in queue options MSGMNI=41 # Max number of message queue identifiers options MSGSEG=2049 # Max number of message segments options MSGSSZ=16 # Size of a message segment options MSGTQL=41 # Max number of messages in system options NBUF=512 # Number of buffer headers options SCSI_NCR_DEBUG options SCSI_NCR_MAX_SYNC=10000 options SCSI_NCR_MAX_WIDE=1 options SCSI_NCR_MYADDR=7 options SC_DEBUG_LEVEL=5 # Syscons debug level options SC_RENDER_DEBUG # syscons rendering debugging options VFS_BIO_DEBUG # VFS buffer I/O debugging options KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack options KSTACK_USAGE_PROF # Adaptec Array Controller driver options options AAC_DEBUG # Debugging levels: # 0 - quiet, only emit warnings # 1 - noisy, emit major function # points and things done # 2 - extremely noisy, emit trace # items in loops, etc. # Resource Accounting options RACCT # Resource Limits options RCTL # Yet more undocumented options for linting. # BKTR_ALLOC_PAGES has no effect except to cause warnings, and # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the # driver still mostly spells this option BROOKTREE_ALLOC_PAGES. ##options BKTR_ALLOC_PAGES=(217*4+1) options BROOKTREE_ALLOC_PAGES=(217*4+1) options MAXFILES=999 # Random number generator # Only ONE of the below two may be used; they are mutually exclusive. options RANDOM_YARROW # Yarrow CSPRNG (Default) #options RANDOM_FORTUNA # Fortuna CSPRNG options RANDOM_DEBUG # Debugging messages # Module to enable execution of application via emulators like QEMU options IMAGACT_BINMISC # zlib I/O stream support # This enables support for compressed core dumps. options GZIO Index: head/sys/conf/options =================================================================== --- head/sys/conf/options (revision 282640) +++ head/sys/conf/options (revision 282641) @@ -1,941 +1,942 @@ # $FreeBSD$ # # On the handling of kernel options # # All kernel options should be listed in NOTES, with suitable # descriptions. Negative options (options that make some code not # compile) should be commented out; LINT (generated from NOTES) should # compile as much code as possible. Try to structure option-using # code so that a single option only switch code on, or only switch # code off, to make it possible to have a full compile-test. If # necessary, you can check for COMPILING_LINT to get maximum code # coverage. # # All new options shall also be listed in either "conf/options" or # "conf/options.". Options that affect a single source-file # .[c|s] should be directed into "opt_.h", while options # that affect multiple files should either go in "opt_global.h" if # this is a kernel-wide option (used just about everywhere), or in # "opt_.h" if it affects only some files. # Note that the effect of listing only an option without a # header-file-name in conf/options (and cousins) is that the last # convention is followed. # # This handling scheme is not yet fully implemented. # # # Format of this file: # Option name filename # # If filename is missing, the default is # opt_.h AAC_DEBUG opt_aac.h AACRAID_DEBUG opt_aacraid.h AHC_ALLOW_MEMIO opt_aic7xxx.h AHC_TMODE_ENABLE opt_aic7xxx.h AHC_DUMP_EEPROM opt_aic7xxx.h AHC_DEBUG opt_aic7xxx.h AHC_DEBUG_OPTS opt_aic7xxx.h AHC_REG_PRETTY_PRINT opt_aic7xxx.h AHD_DEBUG opt_aic79xx.h AHD_DEBUG_OPTS opt_aic79xx.h AHD_TMODE_ENABLE opt_aic79xx.h AHD_REG_PRETTY_PRINT opt_aic79xx.h ADW_ALLOW_MEMIO opt_adw.h TWA_DEBUG opt_twa.h TWA_FLASH_FIRMWARE opt_twa.h # Debugging options. ALT_BREAK_TO_DEBUGGER opt_kdb.h BREAK_TO_DEBUGGER opt_kdb.h DDB DDB_BUFR_SIZE opt_ddb.h DDB_CAPTURE_DEFAULTBUFSIZE opt_ddb.h DDB_CAPTURE_MAXBUFSIZE opt_ddb.h DDB_CTF opt_ddb.h DDB_NUMSYM opt_ddb.h GDB KDB opt_global.h KDB_TRACE opt_kdb.h KDB_UNATTENDED opt_kdb.h KLD_DEBUG opt_kld.h SYSCTL_DEBUG opt_sysctl.h EARLY_PRINTF opt_global.h TEXTDUMP_PREFERRED opt_ddb.h TEXTDUMP_VERBOSE opt_ddb.h # Miscellaneous options. ADAPTIVE_LOCKMGRS ALQ ALTERA_SDCARD_FAST_SIM opt_altera_sdcard.h ATSE_CFI_HACK opt_cfi.h AUDIT opt_global.h BOOTHOWTO opt_global.h BOOTVERBOSE opt_global.h CALLOUT_PROFILING CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h COMPAT_43 opt_compat.h COMPAT_43TTY opt_compat.h COMPAT_FREEBSD4 opt_compat.h COMPAT_FREEBSD5 opt_compat.h COMPAT_FREEBSD6 opt_compat.h COMPAT_FREEBSD7 opt_compat.h COMPAT_FREEBSD9 opt_compat.h COMPAT_FREEBSD10 opt_compat.h COMPAT_LINUXAPI opt_compat.h COMPILING_LINT opt_global.h CY_PCI_FASTINTR DEADLKRES opt_watchdog.h DIRECTIO FILEMON opt_dontuse.h FFCLOCK FULL_PREEMPTION opt_sched.h GZIO opt_gzio.h IMAGACT_BINMISC opt_dontuse.h IPI_PREEMPTION opt_sched.h GEOM_AES opt_geom.h GEOM_BDE opt_geom.h GEOM_BSD opt_geom.h GEOM_CACHE opt_geom.h GEOM_CONCAT opt_geom.h GEOM_ELI opt_geom.h GEOM_FOX opt_geom.h GEOM_GATE opt_geom.h GEOM_JOURNAL opt_geom.h GEOM_LABEL opt_geom.h GEOM_LABEL_GPT opt_geom.h GEOM_LINUX_LVM opt_geom.h GEOM_MBR opt_geom.h GEOM_MIRROR opt_geom.h GEOM_MULTIPATH opt_geom.h GEOM_NOP opt_geom.h GEOM_PART_APM opt_geom.h GEOM_PART_BSD opt_geom.h GEOM_PART_BSD64 opt_geom.h GEOM_PART_EBR opt_geom.h GEOM_PART_EBR_COMPAT opt_geom.h GEOM_PART_GPT opt_geom.h GEOM_PART_LDM opt_geom.h GEOM_PART_MBR opt_geom.h GEOM_PART_PC98 opt_geom.h GEOM_PART_VTOC8 opt_geom.h GEOM_PC98 opt_geom.h GEOM_RAID opt_geom.h GEOM_RAID3 opt_geom.h GEOM_SHSEC opt_geom.h GEOM_STRIPE opt_geom.h GEOM_SUNLABEL opt_geom.h GEOM_UNCOMPRESS opt_geom.h GEOM_UNCOMPRESS_DEBUG opt_geom.h GEOM_UZIP opt_geom.h GEOM_VINUM opt_geom.h GEOM_VIRSTOR opt_geom.h GEOM_VOL opt_geom.h GEOM_ZERO opt_geom.h KDTRACE_HOOKS opt_global.h KDTRACE_FRAME opt_kdtrace.h KN_HASHSIZE opt_kqueue.h KSTACK_MAX_PAGES KSTACK_PAGES KSTACK_USAGE_PROF KTRACE KTRACE_REQUEST_POOL opt_ktrace.h LIBICONV MAC opt_global.h MAC_BIBA opt_dontuse.h MAC_BSDEXTENDED opt_dontuse.h MAC_IFOFF opt_dontuse.h MAC_LOMAC opt_dontuse.h MAC_MLS opt_dontuse.h MAC_NONE opt_dontuse.h MAC_PARTITION opt_dontuse.h MAC_PORTACL opt_dontuse.h MAC_SEEOTHERUIDS opt_dontuse.h MAC_STATIC opt_mac.h MAC_STUB opt_dontuse.h MAC_TEST opt_dontuse.h MD_ROOT opt_md.h MD_ROOT_FSTYPE opt_md.h MD_ROOT_SIZE opt_md.h MFI_DEBUG opt_mfi.h MFI_DECODE_LOG opt_mfi.h MPROF_BUFFERS opt_mprof.h MPROF_HASH_SIZE opt_mprof.h NEW_PCIB opt_global.h NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h NO_ADAPTIVE_RWLOCKS NO_ADAPTIVE_SX NO_EVENTTIMERS opt_timer.h NO_SYSCTL_DESCR opt_global.h NSWBUF_MIN opt_swap.h MBUF_PACKET_ZONE_DISABLE opt_global.h PANIC_REBOOT_WAIT_TIME opt_panic.h PCI_IOV opt_global.h PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h PPS_SYNC opt_ntp.h PREEMPTION opt_sched.h QUOTA SCHED_4BSD opt_sched.h SCHED_STATS opt_sched.h SCHED_ULE opt_sched.h SLEEPQUEUE_PROFILING SLHCI_DEBUG opt_slhci.h SPX_HACK STACK opt_stack.h SUIDDIR MSGMNB opt_sysvipc.h MSGMNI opt_sysvipc.h MSGSEG opt_sysvipc.h MSGSSZ opt_sysvipc.h MSGTQL opt_sysvipc.h SEMMNI opt_sysvipc.h SEMMNS opt_sysvipc.h SEMMNU opt_sysvipc.h SEMMSL opt_sysvipc.h SEMOPM opt_sysvipc.h SEMUME opt_sysvipc.h SHMALL opt_sysvipc.h SHMMAX opt_sysvipc.h SHMMAXPGS opt_sysvipc.h SHMMIN opt_sysvipc.h SHMMNI opt_sysvipc.h SHMSEG opt_sysvipc.h SYSVMSG opt_sysvipc.h SYSVSEM opt_sysvipc.h SYSVSHM opt_sysvipc.h SW_WATCHDOG opt_watchdog.h TURNSTILE_PROFILING UMTX_PROFILING VFS_AIO VERBOSE_SYSINIT WLCACHE opt_wavelan.h WLDEBUG opt_wavelan.h # POSIX kernel options P1003_1B_MQUEUE opt_posix.h P1003_1B_SEMAPHORES opt_posix.h _KPOSIX_PRIORITY_SCHEDULING opt_posix.h # Do we want the config file compiled into the kernel? INCLUDE_CONFIG_FILE opt_config.h # Options for static filesystems. These should only be used at config # time, since the corresponding lkms cannot work if there are any static # dependencies. Unusability is enforced by hiding the defines for the # options in a never-included header. AUTOFS opt_dontuse.h CD9660 opt_dontuse.h EXT2FS opt_dontuse.h FDESCFS opt_dontuse.h FFS opt_dontuse.h FUSE opt_dontuse.h MSDOSFS opt_dontuse.h NANDFS opt_dontuse.h NULLFS opt_dontuse.h PROCFS opt_dontuse.h PSEUDOFS opt_dontuse.h REISERFS opt_dontuse.h SMBFS opt_dontuse.h TMPFS opt_dontuse.h UDF opt_dontuse.h UNIONFS opt_dontuse.h ZFS opt_dontuse.h # Pseudofs debugging PSEUDOFS_TRACE opt_pseudofs.h # In-kernel GSS-API KGSSAPI opt_kgssapi.h KGSSAPI_DEBUG opt_kgssapi.h # These static filesystems have one slightly bogus static dependency in # sys/i386/i386/autoconf.c. If any of these filesystems are # statically compiled into the kernel, code for mounting them as root # filesystems will be enabled - but look below. # NFSCL - client # NFSD - server NFSCL opt_nfs.h NFSD opt_nfs.h # filesystems and libiconv bridge CD9660_ICONV opt_dontuse.h MSDOSFS_ICONV opt_dontuse.h UDF_ICONV opt_dontuse.h # If you are following the conditions in the copyright, # you can enable soft-updates which will speed up a lot of thigs # and make the system safer from crashes at the same time. # otherwise a STUB module will be compiled in. SOFTUPDATES opt_ffs.h # On small, embedded systems, it can be useful to turn off support for # snapshots. It saves about 30-40k for a feature that would be lightly # used, if it is used at all. NO_FFS_SNAPSHOT opt_ffs.h # Enabling this option turns on support for Access Control Lists in UFS, # which can be used to support high security configurations. Depends on # UFS_EXTATTR. UFS_ACL opt_ufs.h # Enabling this option turns on support for extended attributes in UFS-based # filesystems, which can be used to support high security configurations # as well as new filesystem features. UFS_EXTATTR opt_ufs.h UFS_EXTATTR_AUTOSTART opt_ufs.h # Enable fast hash lookups for large directories on UFS-based filesystems. UFS_DIRHASH opt_ufs.h # Enable gjournal-based UFS journal. UFS_GJOURNAL opt_ufs.h # The below sentence is not in English, and neither is this one. # We plan to remove the static dependences above, with a # _ROOT option to control if it usable as root. This list # allows these options to be present in config files already (though # they won't make any difference yet). NFS_ROOT opt_nfsroot.h # SMB/CIFS requester NETSMB opt_netsmb.h # Options used only in subr_param.c. HZ opt_param.h MAXFILES opt_param.h NBUF opt_param.h NSFBUFS opt_param.h VM_BCACHE_SIZE_MAX opt_param.h VM_SWZONE_SIZE_MAX opt_param.h MAXUSERS DFLDSIZ opt_param.h MAXDSIZ opt_param.h MAXSSIZ opt_param.h # Generic SCSI options. CAM_MAX_HIGHPOWER opt_cam.h CAMDEBUG opt_cam.h CAM_DEBUG_COMPILE opt_cam.h CAM_DEBUG_DELAY opt_cam.h CAM_DEBUG_BUS opt_cam.h CAM_DEBUG_TARGET opt_cam.h CAM_DEBUG_LUN opt_cam.h CAM_DEBUG_FLAGS opt_cam.h CAM_BOOT_DELAY opt_cam.h SCSI_DELAY opt_scsi.h SCSI_NO_SENSE_STRINGS opt_scsi.h SCSI_NO_OP_STRINGS opt_scsi.h # Options used only in cam/ata/ata_da.c ADA_TEST_FAILURE opt_ada.h ATA_STATIC_ID opt_ada.h # Options used only in cam/scsi/scsi_cd.c CHANGER_MIN_BUSY_SECONDS opt_cd.h CHANGER_MAX_BUSY_SECONDS opt_cd.h # Options used only in cam/scsi/scsi_sa.c. SA_IO_TIMEOUT opt_sa.h SA_SPACE_TIMEOUT opt_sa.h SA_REWIND_TIMEOUT opt_sa.h SA_ERASE_TIMEOUT opt_sa.h SA_1FM_AT_EOD opt_sa.h # Options used only in cam/scsi/scsi_pt.c SCSI_PT_DEFAULT_TIMEOUT opt_pt.h # Options used only in cam/scsi/scsi_ses.c SES_ENABLE_PASSTHROUGH opt_ses.h # Options used in dev/sym/ (Symbios SCSI driver). SYM_SETUP_LP_PROBE_MAP opt_sym.h #-Low Priority Probe Map (bits) # Allows the ncr to take precedence # 1 (1<<0) -> 810a, 860 # 2 (1<<1) -> 825a, 875, 885, 895 # 4 (1<<2) -> 895a, 896, 1510d SYM_SETUP_SCSI_DIFF opt_sym.h #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 SYM_SETUP_PCI_PARITY opt_sym.h #-PCI parity checking # disabled:0, enabled:1 (default) SYM_SETUP_MAX_LUN opt_sym.h #-Number of LUNs supported # default:8, range:[1..64] # Options used only in dev/ncr/* SCSI_NCR_DEBUG opt_ncr.h SCSI_NCR_MAX_SYNC opt_ncr.h SCSI_NCR_MAX_WIDE opt_ncr.h SCSI_NCR_MYADDR opt_ncr.h # Options used only in dev/isp/* ISP_TARGET_MODE opt_isp.h ISP_FW_CRASH_DUMP opt_isp.h ISP_DEFAULT_ROLES opt_isp.h ISP_INTERNAL_TARGET opt_isp.h # Options used only in dev/iscsi ISCSI_INITIATOR_DEBUG opt_iscsi_initiator.h # Net stuff. ACCEPT_FILTER_DATA ACCEPT_FILTER_DNS ACCEPT_FILTER_HTTP ALTQ opt_global.h ALTQ_CBQ opt_altq.h ALTQ_CDNR opt_altq.h ALTQ_DEBUG opt_altq.h ALTQ_HFSC opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_PRIQ opt_altq.h ALTQ_RED opt_altq.h ALTQ_RIO opt_altq.h BOOTP opt_bootp.h BOOTP_BLOCKSIZE opt_bootp.h BOOTP_COMPAT opt_bootp.h BOOTP_NFSROOT opt_bootp.h BOOTP_NFSV3 opt_bootp.h BOOTP_WIRED_TO opt_bootp.h DEVICE_POLLING DUMMYNET opt_ipdn.h INET opt_inet.h INET6 opt_inet6.h IPDIVERT IPFILTER opt_ipfilter.h IPFILTER_DEFAULT_BLOCK opt_ipfilter.h IPFILTER_LOG opt_ipfilter.h IPFILTER_LOOKUP opt_ipfilter.h IPFIREWALL opt_ipfw.h IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h IPFIREWALL_NAT opt_ipfw.h IPFIREWALL_VERBOSE opt_ipfw.h IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_FILTERTUNNEL opt_ipsec.h IPSEC_NAT_T opt_ipsec.h IPSTEALTH KRPC LIBALIAS LIBMBPOOL LIBMCHAIN MBUF_PROFILING MBUF_STRESS_TEST MROUTING opt_mrouting.h NFSLOCKD PCBGROUP opt_pcbgroup.h PF_DEFAULT_TO_DROP opt_pf.h RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h RSS opt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG SIFTR TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_SIGNATURE opt_inet.h VLAN_ARRAY opt_vlan.h XBONEHACK FLOWTABLE opt_route.h FLOWTABLE_HASH_ALL opt_route.h # # SCTP # SCTP opt_sctp.h SCTP_DEBUG opt_sctp.h # Enable debug printfs SCTP_WITH_NO_CSUM opt_sctp.h # Use this at your peril SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free SCTP_MBCNT_LOGGING opt_sctp.h # Log to KTR mbcnt activity SCTP_PACKET_LOGGING opt_sctp.h # Log to a packet buffer last N packets SCTP_LTRACE_CHUNKS opt_sctp.h # Log to KTR chunks processed SCTP_LTRACE_ERRORS opt_sctp.h # Log to KTR error returns. SCTP_USE_PERCPU_STAT opt_sctp.h # Use per cpu stats. SCTP_MCORE_INPUT opt_sctp.h # Have multiple input threads for input mbufs SCTP_LOCAL_TRACE_BUF opt_sctp.h # Use tracebuffer exported via sysctl SCTP_DETAILED_STR_STATS opt_sctp.h # Use per PR-SCTP policy stream stats # # # # Netgraph(4). Use option NETGRAPH to enable the base netgraph code. # Each netgraph node type can be either be compiled into the kernel # or loaded dynamically. To get the former, include the corresponding # option below. Each type has its own man page, e.g. ng_async(4). NETGRAPH NETGRAPH_DEBUG opt_netgraph.h NETGRAPH_ASYNC opt_netgraph.h NETGRAPH_ATMLLC opt_netgraph.h NETGRAPH_ATM_ATMPIF opt_netgraph.h NETGRAPH_BLUETOOTH opt_netgraph.h NETGRAPH_BLUETOOTH_BT3C opt_netgraph.h NETGRAPH_BLUETOOTH_H4 opt_netgraph.h NETGRAPH_BLUETOOTH_HCI opt_netgraph.h NETGRAPH_BLUETOOTH_L2CAP opt_netgraph.h NETGRAPH_BLUETOOTH_SOCKET opt_netgraph.h NETGRAPH_BLUETOOTH_UBT opt_netgraph.h NETGRAPH_BLUETOOTH_UBTBCMFW opt_netgraph.h NETGRAPH_BPF opt_netgraph.h NETGRAPH_BRIDGE opt_netgraph.h NETGRAPH_CAR opt_netgraph.h NETGRAPH_CISCO opt_netgraph.h NETGRAPH_DEFLATE opt_netgraph.h NETGRAPH_DEVICE opt_netgraph.h NETGRAPH_ECHO opt_netgraph.h NETGRAPH_EIFACE opt_netgraph.h NETGRAPH_ETHER opt_netgraph.h NETGRAPH_ETHER_ECHO opt_netgraph.h NETGRAPH_FEC opt_netgraph.h NETGRAPH_FRAME_RELAY opt_netgraph.h NETGRAPH_GIF opt_netgraph.h NETGRAPH_GIF_DEMUX opt_netgraph.h NETGRAPH_HOLE opt_netgraph.h NETGRAPH_IFACE opt_netgraph.h NETGRAPH_IP_INPUT opt_netgraph.h NETGRAPH_IPFW opt_netgraph.h NETGRAPH_KSOCKET opt_netgraph.h NETGRAPH_L2TP opt_netgraph.h NETGRAPH_LMI opt_netgraph.h # MPPC compression requires proprietary files (not included) NETGRAPH_MPPC_COMPRESSION opt_netgraph.h NETGRAPH_MPPC_ENCRYPTION opt_netgraph.h NETGRAPH_NAT opt_netgraph.h NETGRAPH_NETFLOW opt_netgraph.h NETGRAPH_ONE2MANY opt_netgraph.h NETGRAPH_PATCH opt_netgraph.h NETGRAPH_PIPE opt_netgraph.h NETGRAPH_PPP opt_netgraph.h NETGRAPH_PPPOE opt_netgraph.h NETGRAPH_PPTPGRE opt_netgraph.h NETGRAPH_PRED1 opt_netgraph.h NETGRAPH_RFC1490 opt_netgraph.h NETGRAPH_SOCKET opt_netgraph.h NETGRAPH_SPLIT opt_netgraph.h NETGRAPH_SPPP opt_netgraph.h NETGRAPH_TAG opt_netgraph.h NETGRAPH_TCPMSS opt_netgraph.h NETGRAPH_TEE opt_netgraph.h NETGRAPH_TTY opt_netgraph.h NETGRAPH_UI opt_netgraph.h NETGRAPH_VJC opt_netgraph.h NETGRAPH_VLAN opt_netgraph.h # NgATM options NGATM_ATM opt_netgraph.h NGATM_ATMBASE opt_netgraph.h NGATM_SSCOP opt_netgraph.h NGATM_SSCFU opt_netgraph.h NGATM_UNI opt_netgraph.h NGATM_CCATM opt_netgraph.h # DRM options DRM_DEBUG opt_drm.h TI_SF_BUF_JUMBO opt_ti.h TI_JUMBO_HDRSPLIT opt_ti.h # XXX Conflict: # of devices vs network protocol (Native ATM). # This makes "atm.h" unusable. NATM # DPT driver debug flags DPT_MEASURE_PERFORMANCE opt_dpt.h DPT_RESET_HBA opt_dpt.h # Misc debug flags. Most of these should probably be replaced with # 'DEBUG', and then let people recompile just the interesting modules # with 'make CC="cc -DDEBUG"'. CLUSTERDEBUG opt_debug_cluster.h DEBUG_1284 opt_ppb_1284.h VP0_DEBUG opt_vpo.h LPT_DEBUG opt_lpt.h PLIP_DEBUG opt_plip.h LOCKF_DEBUG opt_debug_lockf.h SI_DEBUG opt_debug_si.h IFMEDIA_DEBUG opt_ifmedia.h # Fb options FB_DEBUG opt_fb.h FB_INSTALL_CDEV opt_fb.h # ppbus related options PERIPH_1284 opt_ppb_1284.h DONTPROBE_1284 opt_ppb_1284.h # smbus related options ENABLE_ALART opt_intpm.h # These cause changes all over the kernel BLKDEV_IOSIZE opt_global.h BURN_BRIDGES opt_global.h DEBUG opt_global.h DEBUG_LOCKS opt_global.h DEBUG_VFS_LOCKS opt_global.h DFLTPHYS opt_global.h DIAGNOSTIC opt_global.h INVARIANT_SUPPORT opt_global.h INVARIANTS opt_global.h MAXCPU opt_global.h MAXMEMDOM opt_global.h MAXPHYS opt_global.h MCLSHIFT opt_global.h MUTEX_DEBUG opt_global.h MUTEX_NOINLINE opt_global.h LOCK_PROFILING opt_global.h LOCK_PROFILING_FAST opt_global.h MSIZE opt_global.h REGRESSION opt_global.h RWLOCK_NOINLINE opt_global.h SX_NOINLINE opt_global.h VFS_BIO_DEBUG opt_global.h # These are VM related options VM_KMEM_SIZE opt_vm.h VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h VM_NRESERVLEVEL opt_vm.h VM_LEVEL_0_ORDER opt_vm.h NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h MALLOC_DEBUG_MAXZONES opt_vm.h # The MemGuard replacement allocator used for tamper-after-free detection DEBUG_MEMGUARD opt_vm.h # The RedZone malloc(9) protection DEBUG_REDZONE opt_vm.h # Standard SMP options SMP opt_global.h # Size of the kernel message buffer MSGBUF_SIZE opt_msgbuf.h # NFS options NFS_MINATTRTIMO opt_nfs.h NFS_MAXATTRTIMO opt_nfs.h NFS_MINDIRATTRTIMO opt_nfs.h NFS_MAXDIRATTRTIMO opt_nfs.h NFS_DEBUG opt_nfs.h # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver OVERRIDE_CARD opt_bktr.h OVERRIDE_TUNER opt_bktr.h OVERRIDE_DBX opt_bktr.h OVERRIDE_MSP opt_bktr.h BROOKTREE_SYSTEM_DEFAULT opt_bktr.h BROOKTREE_ALLOC_PAGES opt_bktr.h BKTR_OVERRIDE_CARD opt_bktr.h BKTR_OVERRIDE_TUNER opt_bktr.h BKTR_OVERRIDE_DBX opt_bktr.h BKTR_OVERRIDE_MSP opt_bktr.h BKTR_SYSTEM_DEFAULT opt_bktr.h BKTR_ALLOC_PAGES opt_bktr.h BKTR_USE_PLL opt_bktr.h BKTR_GPIO_ACCESS opt_bktr.h BKTR_NO_MSP_RESET opt_bktr.h BKTR_430_FX_MODE opt_bktr.h BKTR_SIS_VIA_MODE opt_bktr.h BKTR_USE_FREEBSD_SMBUS opt_bktr.h BKTR_NEW_MSP34XX_DRIVER opt_bktr.h # Options for uart(4) UART_PPS_ON_CTS opt_uart.h UART_POLL_FREQ opt_uart.h # options for bus/device framework BUS_DEBUG opt_bus.h # options for USB support USB_DEBUG opt_usb.h USB_HOST_ALIGN opt_usb.h USB_REQ_DEBUG opt_usb.h USB_TEMPLATE opt_usb.h USB_VERBOSE opt_usb.h USB_DMA_SINGLE_ALLOC opt_usb.h USB_EHCI_BIG_ENDIAN_DESC opt_usb.h U3G_DEBUG opt_u3g.h UKBD_DFLT_KEYMAP opt_ukbd.h UPLCOM_INTR_INTERVAL opt_uplcom.h UVSCOM_DEFAULT_OPKTSIZE opt_uvscom.h UVSCOM_INTR_INTERVAL opt_uvscom.h # Embedded system options INIT_PATH ROOTDEVNAME FDC_DEBUG opt_fdc.h PCFCLOCK_VERBOSE opt_pcfclock.h PCFCLOCK_MAX_RETRIES opt_pcfclock.h KTR opt_global.h KTR_ALQ opt_ktr.h KTR_MASK opt_ktr.h KTR_CPUMASK opt_ktr.h KTR_COMPILE opt_global.h KTR_BOOT_ENTRIES opt_global.h KTR_ENTRIES opt_global.h KTR_VERBOSE opt_ktr.h WITNESS opt_global.h WITNESS_KDB opt_witness.h WITNESS_NO_VNODE opt_witness.h WITNESS_SKIPSPIN opt_witness.h WITNESS_COUNT opt_witness.h OPENSOLARIS_WITNESS opt_global.h # options for ACPI support ACPI_DEBUG opt_acpi.h ACPI_MAX_TASKS opt_acpi.h ACPI_MAX_THREADS opt_acpi.h ACPI_DMAR opt_acpi.h DEV_ACPI opt_acpi.h # ISA support DEV_ISA opt_isa.h ISAPNP opt_isa.h # various 'device presence' options. DEV_BPF opt_bpf.h DEV_CARP opt_carp.h DEV_ENC opt_enc.h DEV_MCA opt_mca.h DEV_NETMAP opt_global.h DEV_PCI opt_pci.h DEV_PF opt_pf.h DEV_PFLOG opt_pf.h DEV_PFSYNC opt_pf.h DEV_SPLASH opt_splash.h DEV_VLAN opt_vlan.h # EISA support DEV_EISA opt_eisa.h EISA_SLOTS opt_eisa.h # ed driver ED_HPP opt_ed.h ED_3C503 opt_ed.h ED_SIC opt_ed.h # bce driver BCE_DEBUG opt_bce.h BCE_NVRAM_WRITE_SUPPORT opt_bce.h SOCKBUF_DEBUG opt_global.h # options for ubsec driver UBSEC_DEBUG opt_ubsec.h UBSEC_RNDTEST opt_ubsec.h UBSEC_NO_RNG opt_ubsec.h # options for hifn driver HIFN_DEBUG opt_hifn.h HIFN_RNDTEST opt_hifn.h # options for safenet driver SAFE_DEBUG opt_safe.h SAFE_NO_RNG opt_safe.h SAFE_RNDTEST opt_safe.h # syscons/vt options MAXCONS opt_syscons.h SC_ALT_MOUSE_IMAGE opt_syscons.h SC_CUT_SPACES2TABS opt_syscons.h SC_CUT_SEPCHARS opt_syscons.h SC_DEBUG_LEVEL opt_syscons.h SC_DFLT_FONT opt_syscons.h SC_DISABLE_KDBKEY opt_syscons.h SC_DISABLE_REBOOT opt_syscons.h SC_HISTORY_SIZE opt_syscons.h SC_KERNEL_CONS_ATTR opt_syscons.h SC_KERNEL_CONS_REV_ATTR opt_syscons.h SC_MOUSE_CHAR opt_syscons.h SC_NO_CUTPASTE opt_syscons.h SC_NO_FONT_LOADING opt_syscons.h SC_NO_HISTORY opt_syscons.h SC_NO_MODE_CHANGE opt_syscons.h SC_NO_SUSPEND_VTYSWITCH opt_syscons.h SC_NO_SYSMOUSE opt_syscons.h SC_NORM_ATTR opt_syscons.h SC_NORM_REV_ATTR opt_syscons.h SC_PIXEL_MODE opt_syscons.h SC_RENDER_DEBUG opt_syscons.h SC_TWOBUTTON_MOUSE opt_syscons.h VT_ALT_TO_ESC_HACK opt_syscons.h VT_FB_DEFAULT_WIDTH opt_syscons.h VT_FB_DEFAULT_HEIGHT opt_syscons.h VT_MAXWINDOWS opt_syscons.h VT_TWOBUTTON_MOUSE opt_syscons.h DEV_SC opt_syscons.h DEV_VT opt_syscons.h # teken terminal emulator options TEKEN_CONS25 opt_teken.h TEKEN_UTF8 opt_teken.h TERMINAL_KERN_ATTR opt_teken.h TERMINAL_NORM_ATTR opt_teken.h # options for printf PRINTF_BUFR_SIZE opt_printf.h # kbd options KBD_DISABLE_KEYMAP_LOAD opt_kbd.h KBD_INSTALL_CDEV opt_kbd.h KBD_MAXRETRY opt_kbd.h KBD_MAXWAIT opt_kbd.h KBD_RESETDELAY opt_kbd.h KBDIO_DEBUG opt_kbd.h # options for the Atheros driver ATH_DEBUG opt_ath.h ATH_TXBUF opt_ath.h ATH_RXBUF opt_ath.h ATH_DIAGAPI opt_ath.h ATH_TX99_DIAG opt_ath.h ATH_ENABLE_11N opt_ath.h ATH_ENABLE_DFS opt_ath.h ATH_EEPROM_FIRMWARE opt_ath.h ATH_ENABLE_RADIOTAP_VENDOR_EXT opt_ath.h ATH_DEBUG_ALQ opt_ath.h ATH_KTR_INTR_DEBUG opt_ath.h # options for the Atheros hal AH_SUPPORT_AR5416 opt_ah.h # XXX For now, this breaks non-AR9130 chipsets, so only use it # XXX when actually targetting AR9130. AH_SUPPORT_AR9130 opt_ah.h # This is required for AR933x SoC support AH_SUPPORT_AR9330 opt_ah.h AH_SUPPORT_AR9340 opt_ah.h AH_SUPPORT_QCA9550 opt_ah.h AH_DEBUG opt_ah.h AH_ASSERT opt_ah.h AH_DEBUG_ALQ opt_ah.h AH_REGOPS_FUNC opt_ah.h AH_WRITE_REGDOMAIN opt_ah.h AH_DEBUG_COUNTRY opt_ah.h AH_WRITE_EEPROM opt_ah.h AH_PRIVATE_DIAG opt_ah.h AH_NEED_DESC_SWAP opt_ah.h AH_USE_INIPDGAIN opt_ah.h AH_MAXCHAN opt_ah.h AH_RXCFG_SDMAMW_4BYTES opt_ah.h AH_INTERRUPT_DEBUGGING opt_ah.h # AR5416 and later interrupt mitigation # XXX do not use this for AR9130 AH_AR5416_INTERRUPT_MITIGATION opt_ah.h # options for the Broadcom BCM43xx driver (bwi) BWI_DEBUG opt_bwi.h BWI_DEBUG_VERBOSE opt_bwi.h # options for the Marvell 8335 wireless driver MALO_DEBUG opt_malo.h MALO_TXBUF opt_malo.h MALO_RXBUF opt_malo.h # options for the Marvell wireless driver MWL_DEBUG opt_mwl.h MWL_TXBUF opt_mwl.h MWL_RXBUF opt_mwl.h MWL_DIAGAPI opt_mwl.h MWL_AGGR_SIZE opt_mwl.h MWL_TX_NODROP opt_mwl.h # Options for the Intel 802.11n wireless driver IWN_DEBUG opt_iwn.h # Options for the Intel 3945ABG wireless driver WPI_DEBUG opt_wpi.h # dcons options DCONS_BUF_SIZE opt_dcons.h DCONS_POLL_HZ opt_dcons.h DCONS_FORCE_CONSOLE opt_dcons.h DCONS_FORCE_GDB opt_dcons.h # HWPMC options +HWPMC_DEBUG opt_global.h HWPMC_HOOKS HWPMC_MIPS_BACKTRACE opt_hwpmc_hooks.h # XBOX options for FreeBSD/i386, but some files are MI XBOX opt_xbox.h # Interrupt filtering INTR_FILTER # 802.11 support layer IEEE80211_DEBUG opt_wlan.h IEEE80211_DEBUG_REFCNT opt_wlan.h IEEE80211_AMPDU_AGE opt_wlan.h IEEE80211_SUPPORT_MESH opt_wlan.h IEEE80211_SUPPORT_SUPERG opt_wlan.h IEEE80211_SUPPORT_TDMA opt_wlan.h IEEE80211_ALQ opt_wlan.h IEEE80211_DFS_DEBUG opt_wlan.h # 802.11 TDMA support TDMA_SLOTLEN_DEFAULT opt_tdma.h TDMA_SLOTCNT_DEFAULT opt_tdma.h TDMA_BINTVAL_DEFAULT opt_tdma.h TDMA_TXRATE_11B_DEFAULT opt_tdma.h TDMA_TXRATE_11G_DEFAULT opt_tdma.h TDMA_TXRATE_11A_DEFAULT opt_tdma.h TDMA_TXRATE_TURBO_DEFAULT opt_tdma.h TDMA_TXRATE_HALF_DEFAULT opt_tdma.h TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h TDMA_TXRATE_11NA_DEFAULT opt_tdma.h TDMA_TXRATE_11NG_DEFAULT opt_tdma.h # VideoMode PICKMODE_DEBUG opt_videomode.h # Network stack virtualization options VIMAGE opt_global.h VNET_DEBUG opt_global.h # Common Flash Interface (CFI) options CFI_SUPPORT_STRATAFLASH opt_cfi.h CFI_ARMEDANDDANGEROUS opt_cfi.h # Sound options SND_DEBUG opt_snd.h SND_DIAGNOSTIC opt_snd.h SND_FEEDER_MULTIFORMAT opt_snd.h SND_FEEDER_FULL_MULTIFORMAT opt_snd.h SND_FEEDER_RATE_HP opt_snd.h SND_PCM_64 opt_snd.h SND_OLDSTEREO opt_snd.h X86BIOS # Flattened device tree options FDT opt_platform.h FDT_DTB_STATIC opt_platform.h # OFED Infiniband stack OFED opt_ofed.h OFED_DEBUG_INIT opt_ofed.h SDP opt_ofed.h SDP_DEBUG opt_ofed.h IPOIB opt_ofed.h IPOIB_DEBUG opt_ofed.h IPOIB_CM opt_ofed.h # Resource Accounting RACCT opt_global.h RACCT_DISABLED opt_global.h # Resource Limits RCTL opt_global.h # Random number generator(s) RANDOM_YARROW opt_random.h RANDOM_FORTUNA opt_random.h RANDOM_DEBUG opt_random.h Index: head/sys/dev/hwpmc/hwpmc_amd.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_amd.c (revision 282640) +++ head/sys/dev/hwpmc/hwpmc_amd.c (revision 282641) @@ -1,1033 +1,1033 @@ /*- * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* Support for the AMD K7 and later processors */ #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef DEBUG +#ifdef HWPMC_DEBUG enum pmc_class amd_pmc_class; #endif /* AMD K7 & K8 PMCs */ struct amd_descr { struct pmc_descr pm_descr; /* "base class" */ uint32_t pm_evsel; /* address of EVSEL register */ uint32_t pm_perfctr; /* address of PERFCTR register */ }; static struct amd_descr amd_pmcdesc[AMD_NPMCS] = { { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_0, .pm_perfctr = AMD_PMC_PERFCTR_0 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_1, .pm_perfctr = AMD_PMC_PERFCTR_1 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_2, .pm_perfctr = AMD_PMC_PERFCTR_2 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_3, .pm_perfctr = AMD_PMC_PERFCTR_3 } }; struct amd_event_code_map { enum pmc_event pe_ev; /* enum value */ uint8_t pe_code; /* encoded event mask */ uint8_t pe_mask; /* bits allowed in unit mask */ }; const struct amd_event_code_map amd_event_codes[] = { #if defined(__i386__) /* 32 bit Athlon (K7) only */ { PMC_EV_K7_DC_ACCESSES, 0x40, 0 }, { PMC_EV_K7_DC_MISSES, 0x41, 0 }, { PMC_EV_K7_DC_REFILLS_FROM_L2, 0x42, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_DC_REFILLS_FROM_SYSTEM, 0x43, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_DC_WRITEBACKS, 0x44, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_L1_DTLB_MISS_AND_L2_DTLB_HITS, 0x45, 0 }, { PMC_EV_K7_L1_AND_L2_DTLB_MISSES, 0x46, 0 }, { PMC_EV_K7_MISALIGNED_REFERENCES, 0x47, 0 }, { PMC_EV_K7_IC_FETCHES, 0x80, 0 }, { PMC_EV_K7_IC_MISSES, 0x81, 0 }, { PMC_EV_K7_L1_ITLB_MISSES, 0x84, 0 }, { PMC_EV_K7_L1_L2_ITLB_MISSES, 0x85, 0 }, { PMC_EV_K7_RETIRED_INSTRUCTIONS, 0xC0, 0 }, { PMC_EV_K7_RETIRED_OPS, 0xC1, 0 }, { PMC_EV_K7_RETIRED_BRANCHES, 0xC2, 0 }, { PMC_EV_K7_RETIRED_BRANCHES_MISPREDICTED, 0xC3, 0 }, { PMC_EV_K7_RETIRED_TAKEN_BRANCHES, 0xC4, 0 }, { PMC_EV_K7_RETIRED_TAKEN_BRANCHES_MISPREDICTED, 0xC5, 0 }, { PMC_EV_K7_RETIRED_FAR_CONTROL_TRANSFERS, 0xC6, 0 }, { PMC_EV_K7_RETIRED_RESYNC_BRANCHES, 0xC7, 0 }, { PMC_EV_K7_INTERRUPTS_MASKED_CYCLES, 0xCD, 0 }, { PMC_EV_K7_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, 0xCE, 0 }, { PMC_EV_K7_HARDWARE_INTERRUPTS, 0xCF, 0 }, #endif { PMC_EV_K8_FP_DISPATCHED_FPU_OPS, 0x00, 0x3F }, { PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, 0x01, 0x00 }, { PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS, 0x02, 0x00 }, { PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD, 0x20, 0x7F }, { PMC_EV_K8_LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE, 0x21, 0x00 }, { PMC_EV_K8_LS_MICROARCHITECTURAL_RESYNC_BY_SNOOP, 0x22, 0x00 }, { PMC_EV_K8_LS_BUFFER2_FULL, 0x23, 0x00 }, { PMC_EV_K8_LS_LOCKED_OPERATION, 0x24, 0x07 }, { PMC_EV_K8_LS_MICROARCHITECTURAL_LATE_CANCEL, 0x25, 0x00 }, { PMC_EV_K8_LS_RETIRED_CFLUSH_INSTRUCTIONS, 0x26, 0x00 }, { PMC_EV_K8_LS_RETIRED_CPUID_INSTRUCTIONS, 0x27, 0x00 }, { PMC_EV_K8_DC_ACCESS, 0x40, 0x00 }, { PMC_EV_K8_DC_MISS, 0x41, 0x00 }, { PMC_EV_K8_DC_REFILL_FROM_L2, 0x42, 0x1F }, { PMC_EV_K8_DC_REFILL_FROM_SYSTEM, 0x43, 0x1F }, { PMC_EV_K8_DC_COPYBACK, 0x44, 0x1F }, { PMC_EV_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_HIT, 0x45, 0x00 }, { PMC_EV_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_MISS, 0x46, 0x00 }, { PMC_EV_K8_DC_MISALIGNED_DATA_REFERENCE, 0x47, 0x00 }, { PMC_EV_K8_DC_MICROARCHITECTURAL_LATE_CANCEL, 0x48, 0x00 }, { PMC_EV_K8_DC_MICROARCHITECTURAL_EARLY_CANCEL, 0x49, 0x00 }, { PMC_EV_K8_DC_ONE_BIT_ECC_ERROR, 0x4A, 0x03 }, { PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS, 0x4B, 0x07 }, { PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS, 0x4C, 0x03 }, { PMC_EV_K8_BU_CPU_CLK_UNHALTED, 0x76, 0x00 }, { PMC_EV_K8_BU_INTERNAL_L2_REQUEST, 0x7D, 0x1F }, { PMC_EV_K8_BU_FILL_REQUEST_L2_MISS, 0x7E, 0x07 }, { PMC_EV_K8_BU_FILL_INTO_L2, 0x7F, 0x03 }, { PMC_EV_K8_IC_FETCH, 0x80, 0x00 }, { PMC_EV_K8_IC_MISS, 0x81, 0x00 }, { PMC_EV_K8_IC_REFILL_FROM_L2, 0x82, 0x00 }, { PMC_EV_K8_IC_REFILL_FROM_SYSTEM, 0x83, 0x00 }, { PMC_EV_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_HIT, 0x84, 0x00 }, { PMC_EV_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_MISS, 0x85, 0x00 }, { PMC_EV_K8_IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP, 0x86, 0x00 }, { PMC_EV_K8_IC_INSTRUCTION_FETCH_STALL, 0x87, 0x00 }, { PMC_EV_K8_IC_RETURN_STACK_HIT, 0x88, 0x00 }, { PMC_EV_K8_IC_RETURN_STACK_OVERFLOW, 0x89, 0x00 }, { PMC_EV_K8_FR_RETIRED_X86_INSTRUCTIONS, 0xC0, 0x00 }, { PMC_EV_K8_FR_RETIRED_UOPS, 0xC1, 0x00 }, { PMC_EV_K8_FR_RETIRED_BRANCHES, 0xC2, 0x00 }, { PMC_EV_K8_FR_RETIRED_BRANCHES_MISPREDICTED, 0xC3, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES, 0xC4, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED, 0xC5, 0x00 }, { PMC_EV_K8_FR_RETIRED_FAR_CONTROL_TRANSFERS, 0xC6, 0x00 }, { PMC_EV_K8_FR_RETIRED_RESYNCS, 0xC7, 0x00 }, { PMC_EV_K8_FR_RETIRED_NEAR_RETURNS, 0xC8, 0x00 }, { PMC_EV_K8_FR_RETIRED_NEAR_RETURNS_MISPREDICTED, 0xC9, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE, 0xCA, 0x00 }, { PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS, 0xCB, 0x0F }, { PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS, 0xCC, 0x07 }, { PMC_EV_K8_FR_INTERRUPTS_MASKED_CYCLES, 0xCD, 0x00 }, { PMC_EV_K8_FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, 0xCE, 0x00 }, { PMC_EV_K8_FR_TAKEN_HARDWARE_INTERRUPTS, 0xCF, 0x00 }, { PMC_EV_K8_FR_DECODER_EMPTY, 0xD0, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALLS, 0xD1, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FROM_BRANCH_ABORT_TO_RETIRE, 0xD2, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FOR_SERIALIZATION, 0xD3, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FOR_SEGMENT_LOAD, 0xD4, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL, 0xD5, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL, 0xD6, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_FPU_IS_FULL, 0xD7, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_LS_IS_FULL, 0xD8, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET, 0xD9, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_FAR_XFER_OR_RESYNC_BRANCH_PENDING, 0xDA, 0x00 }, { PMC_EV_K8_FR_FPU_EXCEPTIONS, 0xDB, 0x0F }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR0, 0xDC, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR1, 0xDD, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR2, 0xDE, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR3, 0xDF, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT, 0xE0, 0x7 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW, 0xE1, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED, 0xE2, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND, 0xE3, 0x07 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION, 0xE4, 0x0F }, { PMC_EV_K8_NB_SIZED_COMMANDS, 0xEB, 0x7F }, { PMC_EV_K8_NB_PROBE_RESULT, 0xEC, 0x0F }, { PMC_EV_K8_NB_HT_BUS0_BANDWIDTH, 0xF6, 0x0F }, { PMC_EV_K8_NB_HT_BUS1_BANDWIDTH, 0xF7, 0x0F }, { PMC_EV_K8_NB_HT_BUS2_BANDWIDTH, 0xF8, 0x0F } }; const int amd_event_codes_size = sizeof(amd_event_codes) / sizeof(amd_event_codes[0]); /* * Per-processor information */ struct amd_cpu { struct pmc_hw pc_amdpmcs[AMD_NPMCS]; }; static struct amd_cpu **amd_pcpu; /* * read a pmc register */ static int amd_read_pmc(int cpu, int ri, pmc_value_t *v) { enum pmc_mode mode; const struct amd_descr *pd; struct pmc *pm; pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); KASSERT(amd_pcpu[cpu], ("[amd,%d] null per-cpu, cpu %d", __LINE__, cpu)); pm = amd_pcpu[cpu]->pc_amdpmcs[ri].phw_pmc; pd = &amd_pmcdesc[ri]; KASSERT(pm != NULL, ("[amd,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); mode = PMC_TO_MODE(pm); PMCDBG(MDP,REA,1,"amd-read id=%d class=%d", ri, pd->pm_descr.pd_class); -#ifdef DEBUG +#ifdef HWPMC_DEBUG KASSERT(pd->pm_descr.pd_class == amd_pmc_class, ("[amd,%d] unknown PMC class (%d)", __LINE__, pd->pm_descr.pd_class)); #endif tmp = rdmsr(pd->pm_perfctr); /* RDMSR serializes */ PMCDBG(MDP,REA,2,"amd-read (pre-munge) id=%d -> %jd", ri, tmp); if (PMC_IS_SAMPLING_MODE(mode)) { /* Sign extend 48 bit value to 64 bits. */ tmp = (pmc_value_t) (((int64_t) tmp << 16) >> 16); tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); } *v = tmp; PMCDBG(MDP,REA,2,"amd-read (post-munge) id=%d -> %jd", ri, *v); return 0; } /* * Write a PMC MSR. */ static int amd_write_pmc(int cpu, int ri, pmc_value_t v) { const struct amd_descr *pd; enum pmc_mode mode; struct pmc *pm; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); pm = amd_pcpu[cpu]->pc_amdpmcs[ri].phw_pmc; pd = &amd_pmcdesc[ri]; KASSERT(pm != NULL, ("[amd,%d] PMC not owned (cpu%d,pmc%d)", __LINE__, cpu, ri)); mode = PMC_TO_MODE(pm); -#ifdef DEBUG +#ifdef HWPMC_DEBUG KASSERT(pd->pm_descr.pd_class == amd_pmc_class, ("[amd,%d] unknown PMC class (%d)", __LINE__, pd->pm_descr.pd_class)); #endif /* use 2's complement of the count for sampling mode PMCs */ if (PMC_IS_SAMPLING_MODE(mode)) v = AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v); PMCDBG(MDP,WRI,1,"amd-write cpu=%d ri=%d v=%jx", cpu, ri, v); /* write the PMC value */ wrmsr(pd->pm_perfctr, v); return 0; } /* * configure hardware pmc according to the configuration recorded in * pmc 'pm'. */ static int amd_config_pmc(int cpu, int ri, struct pmc *pm) { struct pmc_hw *phw; PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; KASSERT(pm == NULL || phw->phw_pmc == NULL, ("[amd,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__, pm, phw->phw_pmc)); phw->phw_pmc = pm; return 0; } /* * Retrieve a configured PMC pointer from hardware state. */ static int amd_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = amd_pcpu[cpu]->pc_amdpmcs[ri].phw_pmc; return 0; } /* * Machine dependent actions taken during the context switch in of a * thread. */ static int amd_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); /* enable the RDPMC instruction if needed */ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); return 0; } /* * Machine dependent actions taken during the context switch out of a * thread. */ static int amd_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; /* can be NULL */ PMCDBG(MDP,SWO,1, "pc=%p pp=%p enable-msr=%d", pc, pp, pp ? (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) == 1 : 0); /* always turn off the RDPMC instruction */ load_cr4(rcr4() & ~CR4_PCE); return 0; } /* * Check if a given allocation is feasible. */ static int amd_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { int i; uint32_t allowed_unitmask, caps, config, unitmask; enum pmc_event pe; const struct pmc_descr *pd; (void) cpu; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row index %d", __LINE__, ri)); pd = &amd_pmcdesc[ri].pm_descr; /* check class match */ if (pd->pd_class != a->pm_class) return EINVAL; caps = pm->pm_caps; PMCDBG(MDP,ALL,1,"amd-allocate ri=%d caps=0x%x", ri, caps); if ((pd->pd_caps & caps) != caps) return EPERM; pe = a->pm_ev; /* map ev to the correct event mask code */ config = allowed_unitmask = 0; for (i = 0; i < amd_event_codes_size; i++) if (amd_event_codes[i].pe_ev == pe) { config = AMD_PMC_TO_EVENTMASK(amd_event_codes[i].pe_code); allowed_unitmask = AMD_PMC_TO_UNITMASK(amd_event_codes[i].pe_mask); break; } if (i == amd_event_codes_size) return EINVAL; unitmask = a->pm_md.pm_amd.pm_amd_config & AMD_PMC_UNITMASK; if (unitmask & ~allowed_unitmask) /* disallow reserved bits */ return EINVAL; if (unitmask && (caps & PMC_CAP_QUALIFIER)) config |= unitmask; if (caps & PMC_CAP_THRESHOLD) config |= a->pm_md.pm_amd.pm_amd_config & AMD_PMC_COUNTERMASK; /* set at least one of the 'usr' or 'os' caps */ if (caps & PMC_CAP_USER) config |= AMD_PMC_USR; if (caps & PMC_CAP_SYSTEM) config |= AMD_PMC_OS; if ((caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == 0) config |= (AMD_PMC_USR|AMD_PMC_OS); if (caps & PMC_CAP_EDGE) config |= AMD_PMC_EDGE; if (caps & PMC_CAP_INVERT) config |= AMD_PMC_INVERT; if (caps & PMC_CAP_INTERRUPT) config |= AMD_PMC_INT; pm->pm_md.pm_amd.pm_amd_evsel = config; /* save config value */ PMCDBG(MDP,ALL,2,"amd-allocate ri=%d -> config=0x%x", ri, config); return 0; } /* * Release machine dependent state associated with a PMC. This is a * no-op on this architecture. * */ /* ARGSUSED0 */ static int amd_release_pmc(int cpu, int ri, struct pmc *pmc) { -#ifdef DEBUG +#ifdef HWPMC_DEBUG const struct amd_descr *pd; #endif struct pmc_hw *phw; (void) pmc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; KASSERT(phw->phw_pmc == NULL, ("[amd,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); -#ifdef DEBUG +#ifdef HWPMC_DEBUG pd = &amd_pmcdesc[ri]; if (pd->pm_descr.pd_class == amd_pmc_class) KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] PMC %d released while active", __LINE__, ri)); #endif return 0; } /* * start a PMC. */ static int amd_start_pmc(int cpu, int ri) { uint32_t config; struct pmc *pm; struct pmc_hw *phw; const struct amd_descr *pd; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; pm = phw->phw_pmc; pd = &amd_pmcdesc[ri]; KASSERT(pm != NULL, ("[amd,%d] starting cpu%d,pmc%d with null pmc record", __LINE__, cpu, ri)); PMCDBG(MDP,STA,1,"amd-start cpu=%d ri=%d", cpu, ri); KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] pmc%d,cpu%d: Starting active PMC \"%s\"", __LINE__, ri, cpu, pd->pm_descr.pd_name)); /* turn on the PMC ENABLE bit */ config = pm->pm_md.pm_amd.pm_amd_evsel | AMD_PMC_ENABLE; PMCDBG(MDP,STA,2,"amd-start config=0x%x", config); wrmsr(pd->pm_evsel, config); return 0; } /* * Stop a PMC. */ static int amd_stop_pmc(int cpu, int ri) { struct pmc *pm; struct pmc_hw *phw; const struct amd_descr *pd; uint64_t config; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; pm = phw->phw_pmc; pd = &amd_pmcdesc[ri]; KASSERT(pm != NULL, ("[amd,%d] cpu%d,pmc%d no PMC to stop", __LINE__, cpu, ri)); KASSERT(!AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] PMC%d, CPU%d \"%s\" already stopped", __LINE__, ri, cpu, pd->pm_descr.pd_name)); PMCDBG(MDP,STO,1,"amd-stop ri=%d", ri); /* turn off the PMC ENABLE bit */ config = pm->pm_md.pm_amd.pm_amd_evsel & ~AMD_PMC_ENABLE; wrmsr(pd->pm_evsel, config); return 0; } /* * Interrupt handler. This function needs to return '1' if the * interrupt was this CPU's PMCs or '0' otherwise. It is not allowed * to sleep or do anything a 'fast' interrupt handler is not allowed * to do. */ static int amd_intr(int cpu, struct trapframe *tf) { int i, error, retval; uint32_t config, evsel, perfctr; struct pmc *pm; struct amd_cpu *pac; pmc_value_t v; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] out of range CPU %d", __LINE__, cpu)); PMCDBG(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); retval = 0; pac = amd_pcpu[cpu]; /* * look for all PMCs that have interrupted: * - look for a running, sampling PMC which has overflowed * and which has a valid 'struct pmc' association * * If found, we call a helper to process the interrupt. * * If multiple PMCs interrupt at the same time, the AMD64 * processor appears to deliver as many NMIs as there are * outstanding PMC interrupts. So we process only one NMI * interrupt at a time. */ for (i = 0; retval == 0 && i < AMD_NPMCS; i++) { if ((pm = pac->pc_amdpmcs[i].phw_pmc) == NULL || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { continue; } if (!AMD_PMC_HAS_OVERFLOWED(i)) continue; retval = 1; /* Found an interrupting PMC. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* Stop the PMC, reload count. */ evsel = AMD_PMC_EVSEL_0 + i; perfctr = AMD_PMC_PERFCTR_0 + i; v = pm->pm_sc.pm_reloadcount; config = rdmsr(evsel); KASSERT((config & ~AMD_PMC_ENABLE) == (pm->pm_md.pm_amd.pm_amd_evsel & ~AMD_PMC_ENABLE), ("[amd,%d] config mismatch reg=0x%x pm=0x%x", __LINE__, config, pm->pm_md.pm_amd.pm_amd_evsel)); wrmsr(evsel, config & ~AMD_PMC_ENABLE); wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v)); /* Restart the counter if logging succeeded. */ error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); if (error == 0) wrmsr(evsel, config | AMD_PMC_ENABLE); } atomic_add_int(retval ? &pmc_stats.pm_intr_processed : &pmc_stats.pm_intr_ignored, 1); return (retval); } /* * describe a PMC */ static int amd_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { int error; size_t copied; const struct amd_descr *pd; struct pmc_hw *phw; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] row-index %d out of range", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; pd = &amd_pmcdesc[ri]; if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, PMC_NAME_MAX, &copied)) != 0) return error; pi->pm_class = pd->pm_descr.pd_class; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return 0; } /* * i386 specific entry points */ /* * return the MSR address of the given PMC. */ static int amd_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] ri %d out of range", __LINE__, ri)); *msr = amd_pmcdesc[ri].pm_perfctr - AMD_PMC_PERFCTR_0; return (0); } /* * processor dependent initialization. */ static int amd_pcpu_init(struct pmc_mdep *md, int cpu) { int classindex, first_ri, n; struct pmc_cpu *pc; struct amd_cpu *pac; struct pmc_hw *phw; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] insane cpu number %d", __LINE__, cpu)); PMCDBG(MDP,INI,1,"amd-init cpu=%d", cpu); amd_pcpu[cpu] = pac = malloc(sizeof(struct amd_cpu), M_PMC, M_WAITOK|M_ZERO); /* * Set the content of the hardware descriptors to a known * state and initialize pointers in the MI per-cpu descriptor. */ pc = pmc_pcpu[cpu]; #if defined(__amd64__) classindex = PMC_MDEP_CLASS_INDEX_K8; #elif defined(__i386__) classindex = md->pmd_cputype == PMC_CPU_AMD_K8 ? PMC_MDEP_CLASS_INDEX_K8 : PMC_MDEP_CLASS_INDEX_K7; #endif first_ri = md->pmd_classdep[classindex].pcd_ri; KASSERT(pc != NULL, ("[amd,%d] NULL per-cpu pointer", __LINE__)); for (n = 0, phw = pac->pc_amdpmcs; n < AMD_NPMCS; n++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); phw->phw_pmc = NULL; pc->pc_hwpmcs[n + first_ri] = phw; } return (0); } /* * processor dependent cleanup prior to the KLD * being unloaded */ static int amd_pcpu_fini(struct pmc_mdep *md, int cpu) { int classindex, first_ri, i; uint32_t evsel; struct pmc_cpu *pc; struct amd_cpu *pac; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] insane cpu number (%d)", __LINE__, cpu)); PMCDBG(MDP,INI,1,"amd-cleanup cpu=%d", cpu); /* * First, turn off all PMCs on this CPU. */ for (i = 0; i < 4; i++) { /* XXX this loop is now not needed */ evsel = rdmsr(AMD_PMC_EVSEL_0 + i); evsel &= ~AMD_PMC_ENABLE; wrmsr(AMD_PMC_EVSEL_0 + i, evsel); } /* * Next, free up allocated space. */ if ((pac = amd_pcpu[cpu]) == NULL) return (0); amd_pcpu[cpu] = NULL; -#ifdef DEBUG +#ifdef HWPMC_DEBUG for (i = 0; i < AMD_NPMCS; i++) { KASSERT(pac->pc_amdpmcs[i].phw_pmc == NULL, ("[amd,%d] CPU%d/PMC%d in use", __LINE__, cpu, i)); KASSERT(AMD_PMC_IS_STOPPED(AMD_PMC_EVSEL_0 + i), ("[amd,%d] CPU%d/PMC%d not stopped", __LINE__, cpu, i)); } #endif pc = pmc_pcpu[cpu]; KASSERT(pc != NULL, ("[amd,%d] NULL per-cpu state", __LINE__)); #if defined(__amd64__) classindex = PMC_MDEP_CLASS_INDEX_K8; #elif defined(__i386__) classindex = md->pmd_cputype == PMC_CPU_AMD_K8 ? PMC_MDEP_CLASS_INDEX_K8 : PMC_MDEP_CLASS_INDEX_K7; #endif first_ri = md->pmd_classdep[classindex].pcd_ri; /* * Reset pointers in the MI 'per-cpu' state. */ for (i = 0; i < AMD_NPMCS; i++) { pc->pc_hwpmcs[i + first_ri] = NULL; } free(pac, M_PMC); return (0); } /* * Initialize ourselves. */ struct pmc_mdep * pmc_amd_initialize(void) { int classindex, error, i, ncpus; struct pmc_classdep *pcd; enum pmc_cputype cputype; struct pmc_mdep *pmc_mdep; enum pmc_class class; char *name; /* * The presence of hardware performance counters on the AMD * Athlon, Duron or later processors, is _not_ indicated by * any of the processor feature flags set by the 'CPUID' * instruction, so we only check the 'instruction family' * field returned by CPUID for instruction family >= 6. */ name = NULL; switch (cpu_id & 0xF00) { #if defined(__i386__) case 0x600: /* Athlon(tm) processor */ classindex = PMC_MDEP_CLASS_INDEX_K7; cputype = PMC_CPU_AMD_K7; class = PMC_CLASS_K7; name = "K7"; break; #endif case 0xF00: /* Athlon64/Opteron processor */ classindex = PMC_MDEP_CLASS_INDEX_K8; cputype = PMC_CPU_AMD_K8; class = PMC_CLASS_K8; name = "K8"; break; default: (void) printf("pmc: Unknown AMD CPU.\n"); return NULL; } -#ifdef DEBUG +#ifdef HWPMC_DEBUG amd_pmc_class = class; #endif /* * Allocate space for pointers to PMC HW descriptors and for * the MDEP structure used by MI code. */ amd_pcpu = malloc(sizeof(struct amd_cpu *) * pmc_cpu_max(), M_PMC, M_WAITOK|M_ZERO); /* * These processors have two classes of PMCs: the TSC and * programmable PMCs. */ pmc_mdep = pmc_mdep_alloc(2); pmc_mdep->pmd_cputype = cputype; ncpus = pmc_cpu_max(); /* Initialize the TSC. */ error = pmc_tsc_initialize(pmc_mdep, ncpus); if (error) goto error; /* Initialize AMD K7 and K8 PMC handling. */ pcd = &pmc_mdep->pmd_classdep[classindex]; pcd->pcd_caps = AMD_PMC_CAPS; pcd->pcd_class = class; pcd->pcd_num = AMD_NPMCS; pcd->pcd_ri = pmc_mdep->pmd_npmc; pcd->pcd_width = 48; /* fill in the correct pmc name and class */ for (i = 0; i < AMD_NPMCS; i++) { (void) snprintf(amd_pmcdesc[i].pm_descr.pd_name, sizeof(amd_pmcdesc[i].pm_descr.pd_name), "%s-%d", name, i); amd_pmcdesc[i].pm_descr.pd_class = class; } pcd->pcd_allocate_pmc = amd_allocate_pmc; pcd->pcd_config_pmc = amd_config_pmc; pcd->pcd_describe = amd_describe; pcd->pcd_get_config = amd_get_config; pcd->pcd_get_msr = amd_get_msr; pcd->pcd_pcpu_fini = amd_pcpu_fini; pcd->pcd_pcpu_init = amd_pcpu_init; pcd->pcd_read_pmc = amd_read_pmc; pcd->pcd_release_pmc = amd_release_pmc; pcd->pcd_start_pmc = amd_start_pmc; pcd->pcd_stop_pmc = amd_stop_pmc; pcd->pcd_write_pmc = amd_write_pmc; pmc_mdep->pmd_pcpu_init = NULL; pmc_mdep->pmd_pcpu_fini = NULL; pmc_mdep->pmd_intr = amd_intr; pmc_mdep->pmd_switch_in = amd_switch_in; pmc_mdep->pmd_switch_out = amd_switch_out; pmc_mdep->pmd_npmc += AMD_NPMCS; PMCDBG(MDP,INI,0,"%s","amd-initialize"); return (pmc_mdep); error: if (error) { free(pmc_mdep, M_PMC); pmc_mdep = NULL; } return (NULL); } /* * Finalization code for AMD CPUs. */ void pmc_amd_finalize(struct pmc_mdep *md) { #if defined(INVARIANTS) int classindex, i, ncpus, pmcclass; #endif pmc_tsc_finalize(md); KASSERT(amd_pcpu != NULL, ("[amd,%d] NULL per-cpu array pointer", __LINE__)); #if defined(INVARIANTS) switch (md->pmd_cputype) { #if defined(__i386__) case PMC_CPU_AMD_K7: classindex = PMC_MDEP_CLASS_INDEX_K7; pmcclass = PMC_CLASS_K7; break; #endif default: classindex = PMC_MDEP_CLASS_INDEX_K8; pmcclass = PMC_CLASS_K8; } KASSERT(md->pmd_classdep[classindex].pcd_class == pmcclass, ("[amd,%d] pmc class mismatch", __LINE__)); ncpus = pmc_cpu_max(); for (i = 0; i < ncpus; i++) KASSERT(amd_pcpu[i] == NULL, ("[amd,%d] non-null pcpu", __LINE__)); #endif free(amd_pcpu, M_PMC); amd_pcpu = NULL; } Index: head/sys/dev/hwpmc/hwpmc_logging.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_logging.c (revision 282640) +++ head/sys/dev/hwpmc/hwpmc_logging.c (revision 282641) @@ -1,1073 +1,1073 @@ /*- * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Logging code for hwpmc(4) */ #include __FBSDID("$FreeBSD$"); #include #if (__FreeBSD_version >= 1100000) #include #else #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Sysctl tunables */ SYSCTL_DECL(_kern_hwpmc); /* * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers. */ static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN, &pmclog_buffer_size, 0, "size of log buffers in kilobytes"); /* * kern.hwpmc.nbuffer -- number of global log buffers */ static int pmc_nlogbuffers = PMC_NLOGBUFFERS; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_RDTUN, &pmc_nlogbuffers, 0, "number of global log buffers"); /* * Global log buffer list and associated spin lock. */ TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist = TAILQ_HEAD_INITIALIZER(pmc_bufferlist); static struct mtx pmc_bufferlist_mtx; /* spin lock */ static struct mtx pmc_kthread_mtx; /* sleep lock */ #define PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do { \ const int __roundup = roundup(sizeof(*D), \ sizeof(uint32_t)); \ (D)->plb_fence = ((char *) (D)) + \ 1024*pmclog_buffer_size; \ (D)->plb_base = (D)->plb_ptr = ((char *) (D)) + \ __roundup; \ } while (0) /* * Log file record constructors. */ #define _PMCLOG_TO_HEADER(T,L) \ ((PMCLOG_HEADER_MAGIC << 24) | \ (PMCLOG_TYPE_ ## T << 16) | \ ((L) & 0xFFFF)) /* reserve LEN bytes of space and initialize the entry header */ #define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ int _len = roundup((LEN), sizeof(uint32_t)); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ #define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return) #define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \ error=ENOMEM;goto error) #define PMCLOG_EMIT32(V) do { *_le++ = (V); } while (0) #define PMCLOG_EMIT64(V) do { \ *_le++ = (uint32_t) ((V) & 0xFFFFFFFF); \ *_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF); \ } while (0) /* Emit a string. Caution: does NOT update _le, so needs to be last */ #define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0) #define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0) #define PMCLOG_DESPATCH(PO) \ pmclog_release((PO)); \ } while (0) /* * Assertions about the log file format. */ CTASSERT(sizeof(struct pmclog_callchain) == 6*4 + PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_closelog) == 3*4); CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4); CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX + 4*4 + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) == 4*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_pcsample) == 6*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4); CTASSERT(sizeof(struct pmclog_pmcattach) == 5*4 + PATH_MAX); CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 5*4); CTASSERT(sizeof(struct pmclog_pmcdetach) == 5*4); CTASSERT(sizeof(struct pmclog_proccsw) == 5*4 + 8); CTASSERT(sizeof(struct pmclog_procexec) == 5*4 + PATH_MAX + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 5*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_procexit) == 5*4 + 8); CTASSERT(sizeof(struct pmclog_procfork) == 5*4); CTASSERT(sizeof(struct pmclog_sysexit) == 4*4); CTASSERT(sizeof(struct pmclog_userdata) == 4*4); /* * Log buffer structure */ struct pmclog_buffer { TAILQ_ENTRY(pmclog_buffer) plb_next; char *plb_base; char *plb_ptr; char *plb_fence; }; /* * Prototypes */ static int pmclog_get_buffer(struct pmc_owner *po); static void pmclog_loop(void *arg); static void pmclog_release(struct pmc_owner *po); static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); static void pmclog_schedule_io(struct pmc_owner *po); static void pmclog_stop_kthread(struct pmc_owner *po); /* * Helper functions */ /* * Get a log buffer */ static int pmclog_get_buffer(struct pmc_owner *po) { struct pmclog_buffer *plb; mtx_assert(&po->po_mtx, MA_OWNED); KASSERT(po->po_curbuf == NULL, ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); mtx_lock_spin(&pmc_bufferlist_mtx); if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next); mtx_unlock_spin(&pmc_bufferlist_mtx); PMCDBG(LOG,GTB,1, "po=%p plb=%p", po, plb); -#ifdef DEBUG +#ifdef HWPMC_DEBUG if (plb) KASSERT(plb->plb_ptr == plb->plb_base && plb->plb_base < plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p " "base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); #endif po->po_curbuf = plb; /* update stats */ atomic_add_int(&pmc_stats.pm_buffer_requests, 1); if (plb == NULL) atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1); return (plb ? 0 : ENOMEM); } /* * Log handler loop. * * This function is executed by each pmc owner's helper thread. */ static void pmclog_loop(void *arg) { int error; struct pmc_owner *po; struct pmclog_buffer *lb; struct proc *p; struct ucred *ownercred; struct ucred *mycred; struct thread *td; struct uio auio; struct iovec aiov; size_t nbytes; po = (struct pmc_owner *) arg; p = po->po_owner; td = curthread; mycred = td->td_ucred; PROC_LOCK(p); ownercred = crhold(p->p_ucred); PROC_UNLOCK(p); PMCDBG(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread); KASSERT(po->po_kthread == curthread->td_proc, ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__, po, po->po_kthread, curthread->td_proc)); lb = NULL; /* * Loop waiting for I/O requests to be added to the owner * struct's queue. The loop is exited when the log file * is deconfigured. */ mtx_lock(&pmc_kthread_mtx); for (;;) { /* check if we've been asked to exit */ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) break; if (lb == NULL) { /* look for a fresh buffer to write */ mtx_lock_spin(&po->po_mtx); if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) { mtx_unlock_spin(&po->po_mtx); /* No more buffers and shutdown required. */ if (po->po_flags & PMC_PO_SHUTDOWN) { mtx_unlock(&pmc_kthread_mtx); /* * Close the file to get PMCLOG_EOF * error in pmclog(3). */ fo_close(po->po_file, curthread); mtx_lock(&pmc_kthread_mtx); break; } (void) msleep(po, &pmc_kthread_mtx, PWAIT, "pmcloop", 0); continue; } TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); mtx_unlock_spin(&po->po_mtx); } mtx_unlock(&pmc_kthread_mtx); /* process the request */ PMCDBG(LOG,WRI,2, "po=%p base=%p ptr=%p", po, lb->plb_base, lb->plb_ptr); /* change our thread's credentials before issuing the I/O */ aiov.iov_base = lb->plb_base; aiov.iov_len = nbytes = lb->plb_ptr - lb->plb_base; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = -1; auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; /* switch thread credentials -- see kern_ktrace.c */ td->td_ucred = ownercred; error = fo_write(po->po_file, &auio, ownercred, 0, td); td->td_ucred = mycred; if (error) { /* XXX some errors are recoverable */ /* send a SIGIO to the owner and exit */ PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); mtx_lock(&pmc_kthread_mtx); po->po_error = error; /* save for flush log */ PMCDBG(LOG,WRI,2, "po=%p error=%d", po, error); break; } mtx_lock(&pmc_kthread_mtx); /* put the used buffer back into the global pool */ PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); mtx_lock_spin(&pmc_bufferlist_mtx); TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); mtx_unlock_spin(&pmc_bufferlist_mtx); lb = NULL; } wakeup_one(po->po_kthread); po->po_kthread = NULL; mtx_unlock(&pmc_kthread_mtx); /* return the current I/O buffer to the global pool */ if (lb) { PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); mtx_lock_spin(&pmc_bufferlist_mtx); TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); mtx_unlock_spin(&pmc_bufferlist_mtx); } /* * Exit this thread, signalling the waiter */ crfree(ownercred); kproc_exit(0); } /* * Release and log entry and schedule an I/O if needed. */ static void pmclog_release(struct pmc_owner *po) { KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base)); KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence)); /* schedule an I/O if we've filled a buffer */ if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence) pmclog_schedule_io(po); mtx_unlock_spin(&po->po_mtx); PMCDBG(LOG,REL,1, "po=%p", po); } /* * Attempt to reserve 'length' bytes of space in an owner's log * buffer. The function returns a pointer to 'length' bytes of space * if there was enough space or returns NULL if no space was * available. Non-null returns do so with the po mutex locked. The * caller must invoke pmclog_release() on the pmc owner structure * when done. */ static uint32_t * pmclog_reserve(struct pmc_owner *po, int length) { uintptr_t newptr, oldptr; uint32_t *lh; struct timespec ts; PMCDBG(LOG,ALL,1, "po=%p len=%d", po, length); KASSERT(length % sizeof(uint32_t) == 0, ("[pmclog,%d] length not a multiple of word size", __LINE__)); mtx_lock_spin(&po->po_mtx); /* No more data when shutdown in progress. */ if (po->po_flags & PMC_PO_SHUTDOWN) { mtx_unlock_spin(&po->po_mtx); return (NULL); } if (po->po_curbuf == NULL) if (pmclog_get_buffer(po) != 0) { mtx_unlock_spin(&po->po_mtx); return (NULL); } KASSERT(po->po_curbuf != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base && po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base, po->po_curbuf->plb_fence)); oldptr = (uintptr_t) po->po_curbuf->plb_ptr; newptr = oldptr + length; KASSERT(oldptr != (uintptr_t) NULL, ("[pmclog,%d] po=%p Null log buffer pointer", __LINE__, po)); /* * If we have space in the current buffer, return a pointer to * available space with the PO structure locked. */ if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) { po->po_curbuf->plb_ptr = (char *) newptr; goto done; } /* * Otherwise, schedule the current buffer for output and get a * fresh buffer. */ pmclog_schedule_io(po); if (pmclog_get_buffer(po) != 0) { mtx_unlock_spin(&po->po_mtx); return (NULL); } KASSERT(po->po_curbuf != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); KASSERT(po->po_curbuf->plb_ptr != NULL, ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__)); KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base && po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base, po->po_curbuf->plb_fence)); oldptr = (uintptr_t) po->po_curbuf->plb_ptr; done: lh = (uint32_t *) oldptr; lh++; /* skip header */ getnanotime(&ts); /* fill in the timestamp */ *lh++ = ts.tv_sec & 0xFFFFFFFF; *lh++ = ts.tv_nsec & 0xFFFFFFF; return ((uint32_t *) oldptr); } /* * Schedule an I/O. * * Transfer the current buffer to the helper kthread. */ static void pmclog_schedule_io(struct pmc_owner *po) { KASSERT(po->po_curbuf != NULL, ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base)); KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence)); PMCDBG(LOG,SIO, 1, "po=%p", po); mtx_assert(&po->po_mtx, MA_OWNED); /* * Add the current buffer to the tail of the buffer list and * wakeup the helper. */ TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next); po->po_curbuf = NULL; wakeup_one(po); } /* * Stop the helper kthread. */ static void pmclog_stop_kthread(struct pmc_owner *po) { /* * Close the file to force the thread out of fo_write, * unset flag, wakeup the helper thread, * wait for it to exit */ if (po->po_file != NULL) fo_close(po->po_file, curthread); mtx_lock(&pmc_kthread_mtx); po->po_flags &= ~PMC_PO_OWNS_LOGFILE; wakeup_one(po); if (po->po_kthread) msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0); mtx_unlock(&pmc_kthread_mtx); } /* * Public functions */ /* * Configure a log file for pmc owner 'po'. * * Parameter 'logfd' is a file handle referencing an open file in the * owner process. This file needs to have been opened for writing. */ int pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd) { int error; struct proc *p; cap_rights_t rights; /* * As long as it is possible to get a LOR between pmc_sx lock and * proctree/allproc sx locks used for adding a new process, assure * the former is not held here. */ sx_assert(&pmc_sx, SA_UNLOCKED); PMCDBG(LOG,CFG,1, "config po=%p logfd=%d", po, logfd); p = po->po_owner; /* return EBUSY if a log file was already present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) return (EBUSY); KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread (%p) already present", __LINE__, po, po->po_kthread)); KASSERT(po->po_file == NULL, ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po, po->po_file)); /* get a reference to the file state */ error = fget_write(curthread, logfd, cap_rights_init(&rights, CAP_WRITE), &po->po_file); if (error) goto error; /* mark process as owning a log file */ po->po_flags |= PMC_PO_OWNS_LOGFILE; error = kproc_create(pmclog_loop, po, &po->po_kthread, RFHIGHPID, 0, "hwpmc: proc(%d)", p->p_pid); if (error) goto error; /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); /* create a log initialization entry */ PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE, sizeof(struct pmclog_initialize)); PMCLOG_EMIT32(PMC_VERSION); PMCLOG_EMIT32(md->pmd_cputype); PMCLOG_DESPATCH(po); return (0); error: /* shutdown the thread */ if (po->po_kthread) pmclog_stop_kthread(po); KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not " "stopped", __LINE__, po)); if (po->po_file) (void) fdrop(po->po_file, curthread); po->po_file = NULL; /* clear file and error state */ po->po_error = 0; return (error); } /* * De-configure a log file. This will throw away any buffers queued * for this owner process. */ int pmclog_deconfigure_log(struct pmc_owner *po) { int error; struct pmclog_buffer *lb; PMCDBG(LOG,CFG,1, "de-config po=%p", po); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EINVAL); KASSERT(po->po_sscount == 0, ("[pmclog,%d] po=%p still owning SS PMCs", __LINE__, po)); KASSERT(po->po_file != NULL, ("[pmclog,%d] po=%p no log file", __LINE__, po)); /* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */ pmclog_stop_kthread(po); KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not stopped", __LINE__, po)); /* return all queued log buffers to the global pool */ while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) { TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); mtx_lock_spin(&pmc_bufferlist_mtx); TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); mtx_unlock_spin(&pmc_bufferlist_mtx); } /* return the 'current' buffer to the global pool */ if ((lb = po->po_curbuf) != NULL) { PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); mtx_lock_spin(&pmc_bufferlist_mtx); TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); mtx_unlock_spin(&pmc_bufferlist_mtx); } /* drop a reference to the fd */ error = fdrop(po->po_file, curthread); po->po_file = NULL; po->po_error = 0; return (error); } /* * Flush a process' log buffer. */ int pmclog_flush(struct pmc_owner *po) { int error; struct pmclog_buffer *lb; PMCDBG(LOG,FLS,1, "po=%p", po); /* * If there is a pending error recorded by the logger thread, * return that. */ if (po->po_error) return (po->po_error); error = 0; /* * Check that we do have an active log file. */ mtx_lock(&pmc_kthread_mtx); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; goto error; } /* * Schedule the current buffer if any and not empty. */ mtx_lock_spin(&po->po_mtx); lb = po->po_curbuf; if (lb && lb->plb_ptr != lb->plb_base) { pmclog_schedule_io(po); } else error = ENOBUFS; mtx_unlock_spin(&po->po_mtx); error: mtx_unlock(&pmc_kthread_mtx); return (error); } int pmclog_close(struct pmc_owner *po) { PMCDBG(LOG,CLO,1, "po=%p", po); mtx_lock(&pmc_kthread_mtx); /* * Schedule the current buffer. */ mtx_lock_spin(&po->po_mtx); if (po->po_curbuf) pmclog_schedule_io(po); else wakeup_one(po); mtx_unlock_spin(&po->po_mtx); /* * Initiate shutdown: no new data queued, * thread will close file on last block. */ po->po_flags |= PMC_PO_SHUTDOWN; mtx_unlock(&pmc_kthread_mtx); return (0); } void pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps) { int n, recordlen; uint32_t flags; struct pmc_owner *po; PMCDBG(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid, ps->ps_nsamples); recordlen = offsetof(struct pmclog_callchain, pl_pc) + ps->ps_nsamples * sizeof(uintfptr_t); po = pm->pm_owner; flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags); PMCLOG_RESERVE(po, CALLCHAIN, recordlen); PMCLOG_EMIT32(ps->ps_pid); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(flags); for (n = 0; n < ps->ps_nsamples; n++) PMCLOG_EMITADDR(ps->ps_pc[n]); PMCLOG_DESPATCH(po); } void pmclog_process_closelog(struct pmc_owner *po) { PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog)); PMCLOG_DESPATCH(po); } void pmclog_process_dropnotify(struct pmc_owner *po) { PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify)); PMCLOG_DESPATCH(po); } void pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, const char *path) { int pathlen, recordlen; KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__)); pathlen = strlen(path) + 1; /* #bytes for path name */ recordlen = offsetof(struct pmclog_map_in, pl_pathname) + pathlen; PMCLOG_RESERVE(po, MAP_IN, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH(po); } void pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, uintfptr_t end) { KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__)); PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out)); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITADDR(end); PMCLOG_DESPATCH(po); } void pmclog_process_pmcallocate(struct pmc *pm) { struct pmc_owner *po; struct pmc_soft *ps; po = pm->pm_owner; PMCDBG(LOG,ALL,1, "pm=%p", pm); if (PMC_TO_CLASS(pm) == PMC_CLASS_SOFT) { PMCLOG_RESERVE(po, PMCALLOCATEDYN, sizeof(struct pmclog_pmcallocatedyn)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); ps = pmc_soft_ev_acquire(pm->pm_event); if (ps != NULL) PMCLOG_EMITSTRING(ps->ps_ev.pm_ev_name,PMC_NAME_MAX); else PMCLOG_EMITNULLSTRING(PMC_NAME_MAX); pmc_soft_ev_release(ps); PMCLOG_DESPATCH(po); } else { PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); PMCLOG_DESPATCH(po); } } void pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path) { int pathlen, recordlen; struct pmc_owner *po; PMCDBG(LOG,ATT,1,"pm=%p pid=%d", pm, pid); po = pm->pm_owner; pathlen = strlen(path) + 1; /* #bytes for the string */ recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PMCATTACH, recordlen); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_EMITSTRING(path, pathlen); PMCLOG_DESPATCH(po); } void pmclog_process_pmcdetach(struct pmc *pm, pid_t pid) { struct pmc_owner *po; PMCDBG(LOG,ATT,1,"!pm=%p pid=%d", pm, pid); po = pm->pm_owner; PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH(po); } /* * Log a context switch event to the log file. */ void pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v) { struct pmc_owner *po; KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW, ("[pmclog,%d] log-process-csw called gratuitously", __LINE__)); PMCDBG(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, v); po = pm->pm_owner; PMCLOG_RESERVE(po, PROCCSW, sizeof(struct pmclog_proccsw)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT64(v); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_DESPATCH(po); } void pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid, uintfptr_t startaddr, char *path) { int pathlen, recordlen; PMCDBG(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path); pathlen = strlen(path) + 1; /* #bytes for the path */ recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PROCEXEC, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(startaddr); PMCLOG_EMIT32(pmid); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH(po); } /* * Log a process exit event (and accumulated pmc value) to the log file. */ void pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_owner *po; ri = PMC_TO_ROWINDEX(pm); PMCDBG(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, pp->pp_pmcs[ri].pp_pmcval); po = pm->pm_owner; PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_DESPATCH(po); } /* * Log a fork event. */ void pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid) { PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork)); PMCLOG_EMIT32(oldpid); PMCLOG_EMIT32(newpid); PMCLOG_DESPATCH(po); } /* * Log a process exit event of the form suitable for system-wide PMCs. */ void pmclog_process_sysexit(struct pmc_owner *po, pid_t pid) { PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit)); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH(po); } /* * Write a user log entry. */ int pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl) { int error; PMCDBG(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata); error = 0; PMCLOG_RESERVE_WITH_ERROR(po, USERDATA, sizeof(struct pmclog_userdata)); PMCLOG_EMIT32(wl->pm_userdata); PMCLOG_DESPATCH(po); error: return (error); } /* * Initialization. * * Create a pool of log buffers and initialize mutexes. */ void pmclog_initialize() { int n; struct pmclog_buffer *plb; if (pmclog_buffer_size <= 0) { (void) printf("hwpmc: tunable logbuffersize=%d must be " "greater than zero.\n", pmclog_buffer_size); pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } if (pmc_nlogbuffers <= 0) { (void) printf("hwpmc: tunable nlogbuffers=%d must be greater " "than zero.\n", pmc_nlogbuffers); pmc_nlogbuffers = PMC_NLOGBUFFERS; } /* create global pool of log buffers */ for (n = 0; n < pmc_nlogbuffers; n++) { plb = malloc(1024 * pmclog_buffer_size, M_PMC, M_WAITOK|M_ZERO); PMCLOG_INIT_BUFFER_DESCRIPTOR(plb); TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next); } mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc-leaf", MTX_SPIN); mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF); } /* * Shutdown logging. * * Destroy mutexes and release memory back the to free pool. */ void pmclog_shutdown() { struct pmclog_buffer *plb; mtx_destroy(&pmc_kthread_mtx); mtx_destroy(&pmc_bufferlist_mtx); while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) { TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next); free(plb, M_PMC); } } Index: head/sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_mod.c (revision 282640) +++ head/sys/dev/hwpmc/hwpmc_mod.c (revision 282641) @@ -1,5117 +1,5117 @@ /*- * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needs to be after */ #include #include #include #include #include #include #include #include "hwpmc_soft.h" /* * Types */ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ }; /* * The offset in sysent where the syscall is allocated. */ static int pmc_syscall_num = NO_SYSCALL; struct pmc_cpu **pmc_pcpu; /* per-cpu state */ pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] struct mtx_pool *pmc_mtxpool; static int *pmc_pmcdisp; /* PMC row dispositions */ #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) #define PMC_MARK_ROW_FREE(R) do { \ pmc_pmcdisp[(R)] = 0; \ } while (0) #define PMC_MARK_ROW_STANDALONE(R) do { \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ ("[pmc,%d] row disposition error", __LINE__)); \ } while (0) #define PMC_UNMARK_ROW_STANDALONE(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) #define PMC_MARK_ROW_THREAD(R) do { \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ } while (0) #define PMC_UNMARK_ROW_THREAD(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) /* various event handlers */ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, pmc_kld_unload_tag; /* Module statistics */ struct pmc_op_getdriverstats pmc_stats; /* Machine/processor dependent operations */ static struct pmc_mdep *md; /* * Hash tables mapping owner processes and target threads to PMCs. */ struct mtx pmc_processhash_mtx; /* spin mutex */ static u_long pmc_processhashmask; static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; /* * Hash table of PMC owner descriptors. This table is protected by * the shared PMC "sx" lock. */ static u_long pmc_ownerhashmask; static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; /* * List of PMC owners with system-wide sampling PMCs. */ static LIST_HEAD(, pmc_owner) pmc_ss_owners; /* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; /* * Prototypes */ -#ifdef DEBUG +#ifdef HWPMC_DEBUG static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); static void pmc_log_all_process_mappings(struct pmc_owner *po); static void pmc_log_kernel_mappings(struct pmc *pm); static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); static void pmc_save_cpu_binding(struct pmc_binding *pb); static void pmc_select_cpu(int cpu); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); /* * Kernel tunables and sysctl(8) interface. */ SYSCTL_DECL(_kern_hwpmc); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); -#ifdef DEBUG +#ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr)); SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif /* * kern.hwpmc.hashrows -- determines the number of rows in the * of the hash table used to look up threads */ static int pmc_hashsize = PMC_HASH_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, &pmc_hashsize, 0, "rows in hash tables"); /* * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, &pmc_mtxpool_size, 0, "size of spin mutex pool"); /* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * * Allowing unprivileged processes to allocate system PMCs is convenient * if system-wide measurements need to be taken concurrently with other * per-process measurements. This feature is turned off by default. */ static int pmc_unprivileged_syspmcs = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, &pmc_unprivileged_syspmcs, 0, "allow unprivileged process to allocate system PMCs"); /* * Hash function. Discard the lower 2 bits of the pointer since * these are always zero for our uses. The hash multiplier is * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). */ #if LONG_BIT == 64 #define _PMC_HM 11400714819323198486u #elif LONG_BIT == 32 #define _PMC_HM 2654435769u #else #error Must know the size of 'long' to compile #endif #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) /* * Syscall structures */ /* The `sysent' for the new syscall */ static struct sysent pmc_sysent = { 2, /* sy_narg */ pmc_syscall_handler /* sy_call */ }; static struct syscall_module_data pmc_syscall_mod = { load, NULL, &pmc_syscall_num, &pmc_sysent, #if (__FreeBSD_version >= 1100000) { 0, NULL }, SY_THR_STATIC_KLD, #else { 0, NULL } #endif }; static moduledata_t pmc_mod = { PMC_MODULE_NAME, syscall_module_handler, &pmc_syscall_mod }; DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_VERSION(pmc, PMC_VERSION); -#ifdef DEBUG +#ifdef HWPMC_DEBUG enum pmc_dbgparse_state { PMCDS_WS, /* in whitespace */ PMCDS_MAJOR, /* seen a major keyword */ PMCDS_MINOR }; static int pmc_debugflags_parse(char *newstr, char *fence) { char c, *p, *q; struct pmc_debugflags *tmpflags; int error, found, *newbits, tmp; size_t kwlen; tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); p = newstr; error = 0; for (; p < fence && (c = *p); p++) { /* skip white space */ if (c == ' ' || c == '\t') continue; /* look for a keyword followed by "=" */ for (q = p; p < fence && (c = *p) && c != '='; p++) ; if (c != '=') { error = EINVAL; goto done; } kwlen = p - q; newbits = NULL; /* lookup flag group name */ #define DBG_SET_FLAG_MAJ(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ newbits = &tmpflags->pdb_ ## F; DBG_SET_FLAG_MAJ("cpu", CPU); DBG_SET_FLAG_MAJ("csw", CSW); DBG_SET_FLAG_MAJ("logging", LOG); DBG_SET_FLAG_MAJ("module", MOD); DBG_SET_FLAG_MAJ("md", MDP); DBG_SET_FLAG_MAJ("owner", OWN); DBG_SET_FLAG_MAJ("pmc", PMC); DBG_SET_FLAG_MAJ("process", PRC); DBG_SET_FLAG_MAJ("sampling", SAM); if (newbits == NULL) { error = EINVAL; goto done; } p++; /* skip the '=' */ /* Now parse the individual flags */ tmp = 0; newflag: for (q = p; p < fence && (c = *p); p++) if (c == ' ' || c == '\t' || c == ',') break; /* p == fence or c == ws or c == "," or c == 0 */ if ((kwlen = p - q) == 0) { *newbits = tmp; continue; } found = 0; #define DBG_SET_FLAG_MIN(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) /* a '*' denotes all possible flags in the group */ if (kwlen == 1 && *q == '*') tmp = found = ~0; /* look for individual flag names */ DBG_SET_FLAG_MIN("allocaterow", ALR); DBG_SET_FLAG_MIN("allocate", ALL); DBG_SET_FLAG_MIN("attach", ATT); DBG_SET_FLAG_MIN("bind", BND); DBG_SET_FLAG_MIN("config", CFG); DBG_SET_FLAG_MIN("exec", EXC); DBG_SET_FLAG_MIN("exit", EXT); DBG_SET_FLAG_MIN("find", FND); DBG_SET_FLAG_MIN("flush", FLS); DBG_SET_FLAG_MIN("fork", FRK); DBG_SET_FLAG_MIN("getbuf", GTB); DBG_SET_FLAG_MIN("hook", PMH); DBG_SET_FLAG_MIN("init", INI); DBG_SET_FLAG_MIN("intr", INT); DBG_SET_FLAG_MIN("linktarget", TLK); DBG_SET_FLAG_MIN("mayberemove", OMR); DBG_SET_FLAG_MIN("ops", OPS); DBG_SET_FLAG_MIN("read", REA); DBG_SET_FLAG_MIN("register", REG); DBG_SET_FLAG_MIN("release", REL); DBG_SET_FLAG_MIN("remove", ORM); DBG_SET_FLAG_MIN("sample", SAM); DBG_SET_FLAG_MIN("scheduleio", SIO); DBG_SET_FLAG_MIN("select", SEL); DBG_SET_FLAG_MIN("signal", SIG); DBG_SET_FLAG_MIN("swi", SWI); DBG_SET_FLAG_MIN("swo", SWO); DBG_SET_FLAG_MIN("start", STA); DBG_SET_FLAG_MIN("stop", STO); DBG_SET_FLAG_MIN("syscall", PMS); DBG_SET_FLAG_MIN("unlinktarget", TUL); DBG_SET_FLAG_MIN("write", WRI); if (found == 0) { /* unrecognized flag name */ error = EINVAL; goto done; } if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ *newbits = tmp; continue; } p++; goto newflag; } /* save the new flag set */ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); done: free(tmpflags, M_PMC); return error; } static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) { char *fence, *newstr; int error; unsigned int n; (void) arg1; (void) arg2; /* unused parameters */ n = sizeof(pmc_debugstr); newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO); (void) strlcpy(newstr, pmc_debugstr, n); error = sysctl_handle_string(oidp, newstr, n, req); /* if there is a new string, parse and copy it */ if (error == 0 && req->newptr != NULL) { fence = newstr + (n < req->newlen ? n : req->newlen + 1); if ((error = pmc_debugflags_parse(newstr, fence)) == 0) (void) strlcpy(pmc_debugstr, newstr, sizeof(pmc_debugstr)); } free(newstr, M_PMC); return error; } #endif /* * Map a row index to a classdep structure and return the adjusted row * index for the PMC class index. */ static struct pmc_classdep * pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri) { struct pmc_classdep *pcd; (void) md; KASSERT(ri >= 0 && ri < md->pmd_npmc, ("[pmc,%d] illegal row-index %d", __LINE__, ri)); pcd = pmc_rowindex_to_classdep[ri]; KASSERT(pcd != NULL, ("[pmc,%d] ri %d null pcd", __LINE__, ri)); *adjri = ri - pcd->pcd_ri; KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); return (pcd); } /* * Concurrency Control * * The driver manages the following data structures: * * - target process descriptors, one per target process * - owner process descriptors (and attached lists), one per owner process * - lookup hash tables for owner and target processes * - PMC descriptors (and attached lists) * - per-cpu hardware state * - the 'hook' variable through which the kernel calls into * this module * - the machine hardware state (managed by the MD layer) * * These data structures are accessed from: * * - thread context-switch code * - interrupt handlers (possibly on multiple cpus) * - kernel threads on multiple cpus running on behalf of user * processes doing system calls * - this driver's private kernel threads * * = Locks and Locking strategy = * * The driver uses four locking strategies for its operation: * * - The global SX lock "pmc_sx" is used to protect internal * data structures. * * Calls into the module by syscall() start with this lock being * held in exclusive mode. Depending on the requested operation, * the lock may be downgraded to 'shared' mode to allow more * concurrent readers into the module. Calls into the module from * other parts of the kernel acquire the lock in shared mode. * * This SX lock is held in exclusive mode for any operations that * modify the linkages between the driver's internal data structures. * * The 'pmc_hook' function pointer is also protected by this lock. * It is only examined with the sx lock held in exclusive mode. The * kernel module is allowed to be unloaded only with the sx lock held * in exclusive mode. In normal syscall handling, after acquiring the * pmc_sx lock we first check that 'pmc_hook' is non-null before * proceeding. This prevents races between the thread unloading the module * and other threads seeking to use the module. * * - Lookups of target process structures and owner process structures * cannot use the global "pmc_sx" SX lock because these lookups need * to happen during context switches and in other critical sections * where sleeping is not allowed. We protect these lookup tables * with their own private spin-mutexes, "pmc_processhash_mtx" and * "pmc_ownerhash_mtx". * * - Interrupt handlers work in a lock free manner. At interrupt * time, handlers look at the PMC pointer (phw->phw_pmc) configured * when the PMC was started. If this pointer is NULL, the interrupt * is ignored after updating driver statistics. We ensure that this * pointer is set (using an atomic operation if necessary) before the * PMC hardware is started. Conversely, this pointer is unset atomically * only after the PMC hardware is stopped. * * We ensure that everything needed for the operation of an * interrupt handler is available without it needing to acquire any * locks. We also ensure that a PMC's software state is destroyed only * after the PMC is taken off hardware (on all CPUs). * * - Context-switch handling with process-private PMCs needs more * care. * * A given process may be the target of multiple PMCs. For example, * PMCATTACH and PMCDETACH may be requested by a process on one CPU * while the target process is running on another. A PMC could also * be getting released because its owner is exiting. We tackle * these situations in the following manner: * * - each target process structure 'pmc_process' has an array * of 'struct pmc *' pointers, one for each hardware PMC. * * - At context switch IN time, each "target" PMC in RUNNING state * gets started on hardware and a pointer to each PMC is copied into * the per-cpu phw array. The 'runcount' for the PMC is * incremented. * * - At context switch OUT time, all process-virtual PMCs are stopped * on hardware. The saved value is added to the PMCs value field * only if the PMC is in a non-deleted state (the PMCs state could * have changed during the current time slice). * * Note that since in-between a switch IN on a processor and a switch * OUT, the PMC could have been released on another CPU. Therefore * context switch OUT always looks at the hardware state to turn * OFF PMCs and will update a PMC's saved value only if reachable * from the target process record. * * - OP PMCRELEASE could be called on a PMC at any time (the PMC could * be attached to many processes at the time of the call and could * be active on multiple CPUs). * * We prevent further scheduling of the PMC by marking it as in * state 'DELETED'. If the runcount of the PMC is non-zero then * this PMC is currently running on a CPU somewhere. The thread * doing the PMCRELEASE operation waits by repeatedly doing a * pause() till the runcount comes to zero. * * The contents of a PMC descriptor (struct pmc) are protected using * a spin-mutex. In order to save space, we use a mutex pool. * * In terms of lock types used by witness(4), we use: * - Type "pmc-sx", used by the global SX lock. * - Type "pmc-sleep", for sleep mutexes used by logger threads. * - Type "pmc-per-proc", for protecting PMC owner descriptors. * - Type "pmc-leaf", used for all other spin mutexes. */ /* * save the cpu binding of the current kthread */ static void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG(CPU,BND,2, "%s", "save-cpu"); thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; thread_unlock(curthread); PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } /* * restore the cpu binding of the current thread */ static void pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); thread_lock(curthread); if (pb->pb_bound) sched_bind(curthread, pb->pb_cpu); else sched_unbind(curthread); thread_unlock(curthread); PMCDBG(CPU,BND,2, "%s", "restore-cpu done"); } /* * move execution over the specified cpu and bind it there. */ static void pmc_select_cpu(int cpu) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); /* Never move to an inactive CPU. */ KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " "CPU %d", __LINE__, cpu)); PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu); thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, cpu, curthread->td_oncpu)); PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); } /* * Force a context switch. * * We do this by pause'ing for 1 tick -- invoking mi_switch() is not * guaranteed to force a context switch. */ static void pmc_force_context_switch(void) { pause("pmcctx", 1); } /* * Get the file name for an executable. This is a simple wrapper * around vn_fullpath(9). */ static void pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) { *fullpath = "unknown"; *freepath = NULL; vn_fullpath(curthread, v, fullpath, freepath); } /* * remove an process owning PMCs */ void pmc_remove_owner(struct pmc_owner *po) { struct pmc *pm, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(OWN,ORM,1, "remove-owner po=%p", po); /* Remove descriptor from the owner hash table */ LIST_REMOVE(po, po_next); /* release all owned PMC descriptors */ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { PMCDBG(OWN,ORM,2, "pmc=%p", pm); KASSERT(pm->pm_owner == po, ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); pmc_release_pmc_descriptor(pm); /* will unlink from the list */ pmc_destroy_pmc_descriptor(pm); } KASSERT(po->po_sscount == 0, ("[pmc,%d] SS count not zero", __LINE__)); KASSERT(LIST_EMPTY(&po->po_pmcs), ("[pmc,%d] PMC list not empty", __LINE__)); /* de-configure the log file if present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_deconfigure_log(po); } /* * remove an owner process record if all conditions are met. */ static void pmc_maybe_remove_owner(struct pmc_owner *po) { PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po); /* * Remove owner record if * - this process does not own any PMCs * - this process has not allocated a system-wide sampling buffer */ if (LIST_EMPTY(&po->po_pmcs) && ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } } /* * Add an association between a target process and a PMC. */ static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_target *pt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", __LINE__, pm, pp->pp_proc->p_pid)); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); -#ifdef DEBUG +#ifdef HWPMC_DEBUG LIST_FOREACH(pt, &pm->pm_targets, pt_next) if (pt->pt_process == pp) KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", __LINE__, pp, pm)); #endif pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO); pt->pt_process = pp; LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, (uintptr_t)pm); if (pm->pm_owner->po_owner == pp->pp_proc) pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; /* * Initialize the per-process values at this row index. */ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? pm->pm_sc.pm_reloadcount : 0; pp->pp_refcnt++; } /* * Removes the association between a target process and a PMC. */ static void pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct proc *p; struct pmc_target *ptgt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, ri, pm, pp->pp_pmcs[ri].pp_pmc)); pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; } pp->pp_refcnt--; /* Remove the target process from the PMC structure */ LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) if (ptgt->pt_process == pp) break; KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); LIST_REMOVE(ptgt, pt_next); free(ptgt, M_PMC); /* if the PMC now lacks targets, send the owner a SIGIO */ if (LIST_EMPTY(&pm->pm_targets)) { p = pm->pm_owner->po_owner; PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p, SIGIO); } } /* * Check if PMC 'pm' may be attached to target process 't'. */ static int pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ int decline_attach, i; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) return 0; PROC_LOCK(o); oc = o->p_ucred; crhold(oc); PROC_UNLOCK(o); PROC_LOCK(t); tc = t->p_ucred; crhold(tc); PROC_UNLOCK(t); /* * The effective uid of the PMC owner should match at least one * of the {effective,real,saved} uids of the target process. */ decline_attach = oc->cr_uid != tc->cr_uid && oc->cr_uid != tc->cr_svuid && oc->cr_uid != tc->cr_ruid; /* * Every one of the target's group ids, must be in the owner's * group list. */ for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); /* check the read and saved gids too */ if (decline_attach == 0) decline_attach = !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); return !decline_attach; } /* * Attach a process to a PMC. */ static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { int ri; char *fullpath, *freepath; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', * allocating space as needed. * * Verify that rowindex 'pm_rowindex' is free in the process * descriptor. * * If not, allocate space for a descriptor and link the * process descriptor and PMC. */ ri = PMC_TO_ROWINDEX(pm); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) return ENOMEM; if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ return EEXIST; if (pp->pp_pmcs[ri].pp_pmc != NULL) return EBUSY; pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) pm->pm_flags |= PMC_F_NEEDS_LOGFILE; pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ /* issue an attach event to a configured log file */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { pmc_getfilename(p->p_textvp, &fullpath, &freepath); if (p->p_flag & P_KTHREAD) { fullpath = kernelname; freepath = NULL; } else pmclog_process_pmcattach(pm, p->p_pid, fullpath); if (freepath) free(freepath, M_TEMP); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Attach a process and optionally its children */ static int pmc_attach_process(struct proc *p, struct pmc *pm) { int error; struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * If this PMC successfully allowed a GETMSR operation * in the past, disallow further ATTACHes. */ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) return EPERM; if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_attach_one_process(p, pm); /* * Traverse all child processes, attaching them to * this PMC. */ sx_slock(&proctree_lock); top = p; for (;;) { if ((error = pmc_attach_one_process(p, pm)) != 0) break; if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } if (error) (void) pmc_detach_process(top, pm); done: sx_sunlock(&proctree_lock); return error; } /* * Detach a process from a PMC. If there are no other PMCs tracking * this process, remove the process structure from its hash table. If * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. */ static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) { int ri; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); ri = PMC_TO_ROWINDEX(pm); PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return ESRCH; if (pp->pp_pmcs[ri].pp_pmc != pm) return EINVAL; pmc_unlink_target_process(pm, pp); /* Issue a detach entry if a log file is configured */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcdetach(pm, p->p_pid); /* * If there are no PMCs targetting this process, we remove its * descriptor from the target hash table and unset the P_HWPMC * flag in the struct proc. */ KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal refcnt %d for process struct %p", __LINE__, pp->pp_refcnt, pp)); if (pp->pp_refcnt != 0) /* still a target of some PMC */ return 0; pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) free(pp, M_PMC); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Detach a process and optionally its descendants from a PMC. */ static int pmc_detach_process(struct proc *p, struct pmc *pm) { struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); /* * Traverse all children, detaching them from this PMC. We * ignore errors since we could be detaching a PMC from a * partially attached proc tree. */ sx_slock(&proctree_lock); top = p; for (;;) { (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); if (LIST_EMPTY(&pm->pm_targets)) pm->pm_flags &= ~PMC_F_ATTACH_DONE; return 0; } /* * Thread context switch IN */ static void pmc_process_csw_in(struct thread *td) { int cpu; unsigned int adjri, ri; struct pmc *pm; struct proc *p; struct pmc_cpu *pc; struct pmc_hw *phw; pmc_value_t newvalue; struct pmc_process *pp; struct pmc_classdep *pcd; p = td->td_proc; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; KASSERT(pp->pp_proc == td->td_proc, ("[pmc,%d] not my thread state", __LINE__)); critical_enter(); /* no preemption from this point */ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] wierd CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", __LINE__, PMC_TO_MODE(pm))); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need * be placed on hardware. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* increment PMC runcount */ atomic_add_rel_int(&pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); pcd->pcd_config_pmc(cpu, adjri, pm); phw = pc->pc_hwpmcs[ri]; KASSERT(phw != NULL, ("[pmc,%d] null hw pointer", __LINE__)); KASSERT(phw->phw_pmc == pm, ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, phw->phw_pmc, pm)); /* * Write out saved value and start the PMC. * * Sampling PMCs use a per-process value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu,ri) = pp->pp_pmcs[ri].pp_pmcval; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, PMC_TO_MODE(pm))); mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu, ri) = pm->pm_gv.pm_savedvalue; mtx_pool_unlock_spin(pmc_mtxpool, pm); } PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); pcd->pcd_write_pmc(cpu, adjri, newvalue); pcd->pcd_start_pmc(cpu, adjri); } /* * perform any other architecture/cpu dependent thread * switch-in actions. */ (void) (*md->pmd_switch_in)(pc, pp); critical_exit(); } /* * Thread context switch OUT. */ static void pmc_process_csw_out(struct thread *td) { int cpu; int64_t tmp; struct pmc *pm; struct proc *p; enum pmc_mode mode; struct pmc_cpu *pc; pmc_value_t newvalue; unsigned int adjri, ri; struct pmc_process *pp; struct pmc_classdep *pcd; /* * Locate our process descriptor; this may be NULL if * this process is exiting and we have already removed * the process from the target process table. * * Note that due to kernel preemption, multiple * context switches may happen while the process is * exiting. * * Note also that if the target process cannot be * found we still need to deconfigure any PMCs that * are currently running on hardware. */ p = td->td_proc; pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); /* * save PMCs */ critical_enter(); cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d wierd CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; /* * When a PMC gets unlinked from a target PMC, it will * be removed from the target's pp_pmc[] array. * * However, on a MP system, the target could have been * executing on another CPU at the time of the unlink. * So, at context switch OUT time, we need to look at * the hardware to determine if a PMC is scheduled on * it. */ for (ri = 0; ri < md->pmd_npmc; ri++) { pcd = pmc_ri_to_classdep(md, ri, &adjri); pm = NULL; (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL) /* nothing at this row index */ continue; mode = PMC_TO_MODE(pm); if (!PMC_IS_VIRTUAL_MODE(mode)) continue; /* not a process virtual PMC */ KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* Stop hardware if not already stopped */ if (pm->pm_stalled == 0) pcd->pcd_stop_pmc(cpu, adjri); /* reduce this PMC's runcount */ atomic_subtract_rel_int(&pm->pm_runcount, 1); /* * If this PMC is associated with this process, * save the reading. */ if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pp->pp_refcnt > 0, ("[pmc,%d] pp refcnt = %d", __LINE__, pp->pp_refcnt)); pcd->pcd_read_pmc(cpu, adjri, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); PMCDBG(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd", cpu, ri, tmp); if (mode == PMC_MODE_TS) { /* * For sampling process-virtual PMCs, * we expect the count to be * decreasing as the 'value' * programmed into the PMC is the * number of events to be seen till * the next sampling interrupt. */ if (tmp < 0) tmp += pm->pm_sc.pm_reloadcount; mtx_pool_lock_spin(pmc_mtxpool, pm); pp->pp_pmcs[ri].pp_pmcval -= tmp; if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0) pp->pp_pmcs[ri].pp_pmcval += pm->pm_sc.pm_reloadcount; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* * For counting process-virtual PMCs, * we expect the count to be * increasing monotonically, modulo a 64 * bit wraparound. */ KASSERT((int64_t) tmp >= 0, ("[pmc,%d] negative increment cpu=%d " "ri=%d newvalue=%jx saved=%jx " "incr=%jx", __LINE__, cpu, ri, newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); if (pm->pm_flags & PMC_F_LOG_PROCCSW) pmclog_process_proccsw(pm, pp, tmp); } } /* mark hardware as free */ pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * perform any other architecture/cpu dependent thread * switch out functions. */ (void) (*md->pmd_switch_out)(pc, pp); critical_exit(); } /* * A mapping change for a process. */ static void pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) { int ri; pid_t pid; char *fullpath, *freepath; const struct pmc *pm; struct pmc_owner *po; const struct pmc_process *pp; freepath = fullpath = NULL; pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; /* Inform owners of all system-wide sampling PMCs. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; /* * Inform sampling PMC owners tracking this process. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); done: if (freepath) free(freepath, M_TEMP); } /* * Log an munmap request. */ static void pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) { int ri; pid_t pid; struct pmc_owner *po; const struct pmc *pm; const struct pmc_process *pp; pid = td->td_proc->p_pid; LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } /* * Log mapping information about the kernel. */ static void pmc_log_kernel_mappings(struct pmc *pm) { struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; sx_assert(&pmc_sx, SX_LOCKED); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); po = pm->pm_owner; if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) return; /* * Log the current set of kernel modules. */ kmbase = linker_hwpmc_list_objects(); for (km = kmbase; km->pm_file != NULL; km++) { PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file, (void *) km->pm_address); pmclog_process_map_in(po, (pid_t) -1, km->pm_address, km->pm_file); } free(kmbase, M_LINKER); po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; } /* * Log the mappings for a single process. */ static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) { vm_map_t map; struct vnode *vp; struct vmspace *vm; vm_map_entry_t entry; vm_offset_t last_end; u_int last_timestamp; struct vnode *last_vp; vm_offset_t start_addr; vm_object_t obj, lobj, tobj; char *fullpath, *freepath; last_vp = NULL; last_end = (vm_offset_t) 0; fullpath = freepath = NULL; if ((vm = vmspace_acquire_ref(p)) == NULL) return; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry == NULL) { PMCDBG(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " "NULL! pid=%d vm_map=%p\n", p->p_pid, map); break; } /* * We only care about executable map entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || !(entry->protection & VM_PROT_EXECUTE) || (entry->object.vm_object == NULL)) { continue; } obj = entry->object.vm_object; VM_OBJECT_RLOCK(obj); /* * Walk the backing_object list to find the base * (non-shadowed) vm_object. */ for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } /* * At this point lobj is the base vm_object and it is locked. */ if (lobj == NULL) { PMCDBG(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); VM_OBJECT_RUNLOCK(obj); continue; } if (lobj->type != OBJT_VNODE || lobj->handle == NULL) { if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * Skip contiguous regions that point to the same * vnode, so we don't emit redundant MAP-IN * directives. */ if (entry->start == last_end && lobj->handle == last_vp) { last_end = entry->end; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * We don't want to keep the proc's vm_map or this * vm_object locked while we walk the pathname, since * vn_fullpath() can sleep. However, if we drop the * lock, it's possible for concurrent activity to * modify the vm_map list. To protect against this, * we save the vm_map timestamp before we release the * lock, and check it after we reacquire the lock * below. */ start_addr = entry->start; last_end = entry->end; last_timestamp = map->timestamp; vm_map_unlock_read(map); vp = lobj->handle; vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); freepath = NULL; pmc_getfilename(vp, &fullpath, &freepath); last_vp = vp; vrele(vp); vp = NULL; pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); if (freepath) free(freepath, M_TEMP); vm_map_lock_read(map); /* * If our saved timestamp doesn't match, this means * that the vm_map was modified out from under us and * we can't trust our current "entry" pointer. Do a * new lookup for this entry. If there is no entry * for this address range, vm_map_lookup_entry() will * return the previous one, so we always want to go to * entry->next on the next loop iteration. * * There is an edge condition here that can occur if * there is no entry at or before this address. In * this situation, vm_map_lookup_entry returns * &map->header, which would cause our loop to abort * without processing the rest of the map. However, * in practice this will never happen for process * vm_map. This is because the executable's text * segment is the first mapping in the proc's address * space, and this mapping is never removed until the * process exits, so there will always be a non-header * entry at or before the requested address for * vm_map_lookup_entry to return. */ if (map->timestamp != last_timestamp) vm_map_lookup_entry(map, last_end - 1, &entry); } vm_map_unlock_read(map); vmspace_free(vm); return; } /* * Log mappings for all processes in the system. */ static void pmc_log_all_process_mappings(struct pmc_owner *po) { struct proc *p, *top; sx_assert(&pmc_sx, SX_XLOCKED); if ((p = pfind(1)) == NULL) panic("[pmc,%d] Cannot find init", __LINE__); PROC_UNLOCK(p); sx_slock(&proctree_lock); top = p; for (;;) { pmc_log_process_mappings(po, p); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); } /* * The 'hook' invoked from the kernel proper */ -#ifdef DEBUG +#ifdef HWPMC_DEBUG const char *pmc_hooknames[] = { /* these strings correspond to PMC_FN_* in */ "", "EXEC", "CSW-IN", "CSW-OUT", "SAMPLE", "UNUSED1", "UNUSED2", "MMAP", "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", "SOFTSAMPLING" }; #endif static int pmc_hook_handler(struct thread *td, int function, void *arg) { PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); switch (function) { /* * Process exec() */ case PMC_FN_PROCESS_EXEC: { char *fullpath, *freepath; unsigned int ri; int is_using_hwpmcs; struct pmc *pm; struct proc *p; struct pmc_owner *po; struct pmc_process *pp; struct pmckern_procexec *pk; sx_assert(&pmc_sx, SX_XLOCKED); p = td->td_proc; pmc_getfilename(p->p_textvp, &fullpath, &freepath); pk = (struct pmckern_procexec *) arg; /* Inform owners of SS mode PMCs of the exec event. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); if (!is_using_hwpmcs) { if (freepath) free(freepath, M_TEMP); break; } /* * PMCs are not inherited across an exec(): remove any * PMCs that this process is the owner of. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } /* * If the process being exec'ed is not the target of any * PMC, we are done. */ if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { if (freepath) free(freepath, M_TEMP); break; } /* * Log the exec event to all monitoring owners. Skip * owners who have already recieved the event because * they had system sampling PMCs active. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, pm->pm_id, p->p_pid, pk->pm_entryaddr, fullpath); } if (freepath) free(freepath, M_TEMP); PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", p, p->p_pid, p->p_comm, pk->pm_credentialschanged); if (pk->pm_credentialschanged == 0) /* no change */ break; /* * If the newly exec()'ed process has a different credential * than before, allow it to be the target of a PMC only if * the PMC's owner has sufficient priviledge. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) if (pmc_can_attach(pm, td->td_proc) != 0) pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, pp->pp_refcnt, pp)); /* * If this process is no longer the target of any * PMCs, we can remove the process entry and free * up space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); free(pp, M_PMC); break; } } break; case PMC_FN_CSW_IN: pmc_process_csw_in(td); break; case PMC_FN_CSW_OUT: pmc_process_csw_out(td); break; /* * Process accumulated PC samples. * * This function is expected to be called by hardclock() for * each CPU that has accumulated PC samples. * * This function is to be executed on the CPU whose samples * are being processed. */ case PMC_FN_DO_SAMPLES: /* * Clear the cpu specific bit in the CPU mask before * do the rest of the processing. If the NMI handler * gets invoked after the "atomic_clear_int()" call * below but before "pmc_process_samples()" gets * around to processing the interrupt, then we will * come back here at the next hardclock() tick (and * may find nothing to do if "pmc_process_samples()" * had already processed the interrupt). We don't * lose the interrupt sample. */ CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); pmc_process_samples(PCPU_GET(cpuid), PMC_HR); pmc_process_samples(PCPU_GET(cpuid), PMC_SR); break; case PMC_FN_MMAP: sx_assert(&pmc_sx, SX_LOCKED); pmc_process_mmap(td, (struct pmckern_map_in *) arg); break; case PMC_FN_MUNMAP: sx_assert(&pmc_sx, SX_LOCKED); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; case PMC_FN_USER_CALLCHAIN: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *) arg); td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_USER_CALLCHAIN_SOFT: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR, (struct trapframe *) arg); td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_SOFT_SAMPLING: /* * Call soft PMC sampling intr. */ pmc_soft_intr((struct pmckern_soft *) arg); break; default: -#ifdef DEBUG +#ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); #endif break; } return 0; } /* * allocate a 'struct pmc_owner' descriptor in the owner hash table. */ static struct pmc_owner * pmc_allocate_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; /* allocate space for N pointers and one descriptor struct */ po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO); po->po_owner = p; LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ TAILQ_INIT(&po->po_logbuffers); mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", p, p->p_pid, p->p_comm, po); return po; } static void pmc_destroy_owner_descriptor(struct pmc_owner *po) { PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); mtx_destroy(&po->po_mtx); free(po, M_PMC); } /* * find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ static struct pmc_process * pmc_find_process_descriptor(struct proc *p, uint32_t mode) { uint32_t hindex; struct pmc_process *pp, *ppnew; struct pmc_processhash *pph; hindex = PMC_HASH_PTR(p, pmc_processhashmask); pph = &pmc_processhash[hindex]; ppnew = NULL; /* * Pre-allocate memory in the FIND_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if (mode & PMC_FLAG_ALLOCATE) ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO); mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) if (pp->pp_proc == p) break; if ((mode & PMC_FLAG_REMOVE) && pp != NULL) LIST_REMOVE(pp, pp_next); if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; LIST_INSERT_HEAD(pph, ppnew, pp_next); pp = ppnew; ppnew = NULL; } mtx_unlock_spin(&pmc_processhash_mtx); if (pp != NULL && ppnew != NULL) free(ppnew, M_PMC); return pp; } /* * remove a process descriptor from the process hash table. */ static void pmc_remove_process_descriptor(struct pmc_process *pp) { KASSERT(pp->pp_refcnt == 0, ("[pmc,%d] Removing process descriptor %p with count %d", __LINE__, pp, pp->pp_refcnt)); mtx_lock_spin(&pmc_processhash_mtx); LIST_REMOVE(pp, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); } /* * find an owner descriptor corresponding to proc 'p' */ static struct pmc_owner * pmc_find_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; po = NULL; LIST_FOREACH(po, poh, po_next) if (po->po_owner == p) break; PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); return po; } /* * pmc_allocate_pmc_descriptor * * Allocate a pmc descriptor and initialize its * fields. */ static struct pmc * pmc_allocate_pmc_descriptor(void) { struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return pmc; } /* * Destroy a pmc descriptor. */ static void pmc_destroy_pmc_descriptor(struct pmc *pm) { KASSERT(pm->pm_state == PMC_STATE_DELETED || pm->pm_state == PMC_STATE_FREE, ("[pmc,%d] destroying non-deleted PMC", __LINE__)); KASSERT(LIST_EMPTY(&pm->pm_targets), ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); KASSERT(pm->pm_runcount == 0, ("[pmc,%d] pmc has non-zero run count %d", __LINE__, pm->pm_runcount)); free(pm, M_PMC); } static void pmc_wait_for_pmc_idle(struct pmc *pm) { -#ifdef DEBUG +#ifdef HWPMC_DEBUG volatile int maxloop; maxloop = 100 * pmc_cpu_max(); #endif /* * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ while (atomic_load_acq_32(&pm->pm_runcount) > 0) { -#ifdef DEBUG +#ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%d) waiting too long for " "pmc to be free", __LINE__, PMC_TO_ROWINDEX(pm), pm->pm_runcount)); #endif pmc_force_context_switch(); } } /* * This function does the following things: * * - detaches the PMC from hardware * - unlinks all target threads that were attached to it * - removes the PMC from its owner's list * - destroys the PMC private mutex * * Once this function completes, the given pmc pointer can be freed by * calling pmc_destroy_pmc_descriptor(). */ static void pmc_release_pmc_descriptor(struct pmc *pm) { enum pmc_mode mode; struct pmc_hw *phw; u_int adjri, ri, cpu; struct pmc_owner *po; struct pmc_binding pb; struct pmc_process *pp; struct pmc_classdep *pcd; struct pmc_target *ptgt, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mode = PMC_TO_MODE(pm); PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, mode); /* * First, we take the PMC off hardware. */ cpu = 0; if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch * to this CPU and turn hardware off. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ if (pm->pm_state == PMC_STATE_RUNNING && pm->pm_stalled == 0) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; KASSERT(phw->phw_pmc == pm, ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", __LINE__, ri, phw->phw_pmc, pm)); PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_stop_pmc(cpu, adjri); critical_exit(); } PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_config_pmc(cpu, adjri, NULL); critical_exit(); /* adjust the global and process count of SS mode PMCs */ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { po = pm->pm_owner; po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); LIST_REMOVE(po, po_ssnext); } } pm->pm_state = PMC_STATE_DELETED; pmc_restore_cpu_binding(&pb); /* * We could have references to this PMC structure in * the per-cpu sample queues. Wait for the queue to * drain. */ pmc_wait_for_pmc_idle(pm); } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at * a given instant. * * By marking its state as DELETED, we ensure that * this PMC is never further scheduled on hardware. * * Then we wait till all CPUs are done with this PMC. */ pm->pm_state = PMC_STATE_DELETED; /* Wait for the PMCs runcount to come to zero. */ pmc_wait_for_pmc_idle(pm); /* * At this point the PMC is off all CPUs and cannot be * freshly scheduled onto a CPU. It is now safe to * unlink all targets from this PMC. If a * process-record's refcount falls to zero, we remove * it from the hash table. The module-wide SX lock * protects us from races. */ LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { pp = ptgt->pt_process; pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); /* * If the target process record shows that no * PMCs are attached to it, reclaim its space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); free(pp, M_PMC); } } cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ } /* * Release any MD resources */ (void) pcd->pcd_release_pmc(cpu, adjri, pm); /* * Update row disposition */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); /* unlink from the owner's list */ if (pm->pm_owner) { LIST_REMOVE(pm, pm_next); pm->pm_owner = NULL; } } /* * Register an owner and a pmc. */ static int pmc_register_owner(struct proc *p, struct pmc *pmc) { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) return ENOMEM; KASSERT(pmc->pm_owner == NULL, ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); pmc->pm_owner = po; LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcallocate(pmc); PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", po, pmc); return 0; } /* * Return the current row disposition: * == 0 => FREE * > 0 => PROCESS MODE * < 0 => SYSTEM MODE */ int pmc_getrowdisp(int ri) { return pmc_pmcdisp[ri]; } /* * Check if a PMC at row index 'ri' can be allocated to the current * process. * * Allocation can fail if: * - the current process is already being profiled by a PMC at index 'ri', * attached to it via OP_PMCATTACH. * - the current process has already allocated a PMC at index 'ri' * via OP_ALLOCATE. */ static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { enum pmc_mode mode; struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); /* * We shouldn't have already allocated a process-mode PMC at * row index 'ri'. * * We shouldn't have allocated a system-wide PMC on the same * CPU and same RI. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (PMC_TO_ROWINDEX(pm) == ri) { mode = PMC_TO_MODE(pm); if (PMC_IS_VIRTUAL_MODE(mode)) return EEXIST; if (PMC_IS_SYSTEM_MODE(mode) && (int) PMC_TO_CPU(pm) == cpu) return EEXIST; } } /* * We also shouldn't be the target of any PMC at this index * since otherwise a PMC_ATTACH to ourselves will fail. */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc) return EEXIST; PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", p, p->p_pid, p->p_comm, ri); return 0; } /* * Check if a given PMC at row index 'ri' can be currently used in * mode 'mode'. */ static int pmc_can_allocate_row(int ri, enum pmc_mode mode) { enum pmc_disp disp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); if (PMC_IS_SYSTEM_MODE(mode)) disp = PMC_DISP_STANDALONE; else disp = PMC_DISP_THREAD; /* * check disposition for PMC row 'ri': * * Expected disposition Row-disposition Result * * STANDALONE STANDALONE or FREE proceed * STANDALONE THREAD fail * THREAD THREAD or FREE proceed * THREAD STANDALONE fail */ if (!PMC_ROW_DISP_IS_FREE(ri) && !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) return EBUSY; /* * All OK */ PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); return 0; } /* * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc *pm; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_id == pmcid) return pm; return NULL; } static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) { struct pmc *pm; struct pmc_owner *po; PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid); if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) return ESRCH; if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) return EINVAL; PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); *pmc = pm; return 0; } /* * Start a PMC. */ static int pmc_start(struct pmc *pm) { enum pmc_mode mode; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, error, cpu, ri; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); mode = PMC_TO_MODE(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); error = 0; PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); po = pm->pm_owner; /* * Disallow PMCSTART if a logfile is required but has not been * configured yet. */ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EDOOFUS); /* programming error */ /* * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH has never been done on this PMC, * attach it to its owner process. */ if (LIST_EMPTY(&pm->pm_targets)) error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : pmc_attach_process(po->po_owner, pm); /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ if (error == 0) { pm->pm_state = PMC_STATE_RUNNING; if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) pmc_force_context_switch(); } return (error); } /* * A system-wide PMC. * * Add the owner to the global list if this is a system-wide * sampling PMC. */ if (mode == PMC_MODE_SS) { if (po->po_sscount == 0) { LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); atomic_add_rel_int(&pmc_ss_count, 1); PMCDBG(PMC,OPS,1, "po=%p in global list", po); } po->po_sscount++; /* * Log mapping information for all existing processes in the * system. Subsequent mappings are logged as they happen; * see pmc_process_mmap(). */ if (po->po_logprocmaps == 0) { pmc_log_all_process_mappings(po); po->po_logprocmaps = 1; } } /* * Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); /* * global PMCs are configured at allocation time * so write out the initial value and start the PMC. */ pm->pm_state = PMC_STATE_RUNNING; critical_enter(); if ((error = pcd->pcd_write_pmc(cpu, adjri, PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial)) == 0) error = pcd->pcd_start_pmc(cpu, adjri); critical_exit(); pmc_restore_cpu_binding(&pb); return (error); } /* * Stop a PMC. */ static int pmc_stop(struct pmc *pm) { struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, cpu, error, ri; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; /* * If the PMC is a virtual mode one, changing the state to * non-RUNNING is enough to ensure that the PMC never gets * scheduled. * * If this PMC is current running on a CPU, then it will * handled correctly at the time its target process is context * switched out. */ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return 0; /* * A system-mode PMC. Move to the CPU associated with * this PMC, and stop the hardware. We update the * 'initial count' so that a subsequent PMCSTART will * resume counting from the current hardware count. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (!pmc_cpu_is_active(cpu)) return ENXIO; pmc_select_cpu(cpu); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); critical_exit(); pmc_restore_cpu_binding(&pb); po = pm->pm_owner; /* remove this owner from the global list of SS PMC owners */ if (PMC_TO_MODE(pm) == PMC_MODE_SS) { po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); LIST_REMOVE(po, po_ssnext); PMCDBG(PMC,OPS,2,"po=%p removed from global list", po); } } return (error); } -#ifdef DEBUG +#ifdef HWPMC_DEBUG static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , __PMC_OPS() NULL }; #endif /* * The syscall interface */ #define PMC_GET_SX_XLOCK(...) do { \ sx_xlock(&pmc_sx); \ if (pmc_hook == NULL) { \ sx_xunlock(&pmc_sx); \ return __VA_ARGS__; \ } \ } while (0) #define PMC_DOWNGRADE_SX() do { \ sx_downgrade(&pmc_sx); \ is_sx_downgraded = 1; \ } while (0) static int pmc_syscall_handler(struct thread *td, void *syscall_args) { int error, is_sx_downgraded, is_sx_locked, op; struct pmc_syscall_args *c; void *arg; PMC_GET_SX_XLOCK(ENOSYS); DROP_GIANT(); is_sx_downgraded = 0; is_sx_locked = 1; c = (struct pmc_syscall_args *) syscall_args; op = c->pmop_code; arg = c->pmop_data; PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); error = 0; atomic_add_int(&pmc_stats.pm_syscalls, 1); switch(op) { /* * Configure a log file. * * XXX This OP will be reworked. */ case PMC_OP_CONFIGURELOG: { struct proc *p; struct pmc *pm; struct pmc_owner *po; struct pmc_op_configurelog cl; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &cl, sizeof(cl))) != 0) break; /* mark this process as owning a log file */ p = td->td_proc; if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { error = ENOMEM; break; } /* * If a valid fd was passed in, try to configure that, * otherwise if 'fd' was less than zero and there was * a log file configured, flush its buffers and * de-configure it. */ if (cl.pm_logfd >= 0) { sx_xunlock(&pmc_sx); is_sx_locked = 0; error = pmclog_configure_log(md, po, cl.pm_logfd); } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_closelog(po); error = pmclog_close(po); if (error == 0) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && pm->pm_state == PMC_STATE_RUNNING) pmc_stop(pm); error = pmclog_deconfigure_log(po); } } else error = EINVAL; if (error) break; } break; /* * Flush a log file. */ case PMC_OP_FLUSHLOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_flush(po); } break; /* * Close a log file. */ case PMC_OP_CLOSELOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_close(po); } break; /* * Retrieve hardware configuration. */ case PMC_OP_GETCPUINFO: /* CPU information */ { struct pmc_op_getcpuinfo gci; struct pmc_classinfo *pci; struct pmc_classdep *pcd; int cl; gci.pm_cputype = md->pmd_cputype; gci.pm_ncpu = pmc_cpu_max(); gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; pci = gci.pm_classes; pcd = md->pmd_classdep; for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { pci->pm_caps = pcd->pcd_caps; pci->pm_class = pcd->pcd_class; pci->pm_width = pcd->pcd_width; pci->pm_num = pcd->pcd_num; } error = copyout(&gci, arg, sizeof(gci)); } break; /* * Retrieve soft events list. */ case PMC_OP_GETDYNEVENTINFO: { enum pmc_class cl; enum pmc_event ev; struct pmc_op_getdyneventinfo *gei; struct pmc_dyn_event_descr dev; struct pmc_soft *ps; uint32_t nevent; sx_assert(&pmc_sx, SX_LOCKED); gei = (struct pmc_op_getdyneventinfo *) arg; if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) break; /* Only SOFT class is dynamic. */ if (cl != PMC_CLASS_SOFT) { error = EINVAL; break; } nevent = 0; for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { ps = pmc_soft_ev_acquire(ev); if (ps == NULL) continue; bcopy(&ps->ps_ev, &dev, sizeof(dev)); pmc_soft_ev_release(ps); error = copyout(&dev, &gei->pm_events[nevent], sizeof(struct pmc_dyn_event_descr)); if (error != 0) break; nevent++; } if (error != 0) break; error = copyout(&nevent, &gei->pm_nevent, sizeof(nevent)); } break; /* * Get module statistics */ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; bcopy(&pmc_stats, &gms, sizeof(gms)); error = copyout(&gms, arg, sizeof(gms)); } break; /* * Retrieve module version number */ case PMC_OP_GETMODULEVERSION: { uint32_t cv, modv; /* retrieve the client's idea of the ABI version */ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) break; /* don't service clients newer than our driver */ modv = PMC_VERSION; if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { error = EPROGMISMATCH; break; } error = copyout(&modv, arg, sizeof(int)); } break; /* * Retrieve the state of all the PMCs on a given * CPU. */ case PMC_OP_GETPMCINFO: { int ari; struct pmc *pm; size_t pmcinfo_size; uint32_t cpu, n, npmc; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_info *p, *pmcinfo; struct pmc_op_getpmcinfo *gpi; PMC_DOWNGRADE_SX(); gpi = (struct pmc_op_getpmcinfo *) arg; if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) break; if (cpu >= pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* switch to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); npmc = md->pmd_npmc; pmcinfo_size = npmc * sizeof(struct pmc_info); pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); p = pmcinfo; for (n = 0; n < md->pmd_npmc; n++, p++) { pcd = pmc_ri_to_classdep(md, n, &ari); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) break; if (PMC_ROW_DISP_IS_STANDALONE(n)) p->pm_rowdisp = PMC_DISP_STANDALONE; else if (PMC_ROW_DISP_IS_THREAD(n)) p->pm_rowdisp = PMC_DISP_THREAD; else p->pm_rowdisp = PMC_DISP_FREE; p->pm_ownerpid = -1; if (pm == NULL) /* no PMC associated */ continue; po = pm->pm_owner; KASSERT(po->po_owner != NULL, ("[pmc,%d] pmc_owner had a null proc pointer", __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } pmc_restore_cpu_binding(&pb); /* now copy out the PMC info collected */ if (error == 0) error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); free(pmcinfo, M_PMC); } break; /* * Set the administrative state of a PMC. I.e. whether * the PMC is to be used or not. */ case PMC_OP_PMCADMIN: { int cpu, ri; enum pmc_state request; struct pmc_cpu *pc; struct pmc_hw *phw; struct pmc_op_pmcadmin pma; struct pmc_binding pb; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); error = priv_check(td, PRIV_PMC_MANAGE); if (error) break; if ((error = copyin(arg, &pma, sizeof(pma))) != 0) break; cpu = pma.pm_cpu; if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } request = pma.pm_state; if (request != PMC_STATE_DISABLED && request != PMC_STATE_FREE) { error = EINVAL; break; } ri = pma.pm_pmc; /* pmc id == row index */ if (ri < 0 || ri >= (int) md->pmd_npmc) { error = EINVAL; break; } /* * We can't disable a PMC with a row-index allocated * for process virtual PMCs. */ if (PMC_ROW_DISP_IS_THREAD(ri) && request == PMC_STATE_DISABLED) { error = EBUSY; break; } /* * otherwise, this PMC on this CPU is either free or * in system-wide mode. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); pc = pmc_pcpu[cpu]; phw = pc->pc_hwpmcs[ri]; /* * XXX do we need some kind of 'forced' disable? */ if (phw->phw_pmc == NULL) { if (request == PMC_STATE_DISABLED && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; PMC_MARK_ROW_STANDALONE(ri); } else if (request == PMC_STATE_FREE && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; PMC_UNMARK_ROW_STANDALONE(ri); } /* other cases are a no-op */ } else error = EBUSY; pmc_restore_cpu_binding(&pb); } break; /* * Allocate a PMC. */ case PMC_OP_PMCALLOCATE: { int adjri, n; u_int cpu; uint32_t caps; struct pmc *pmc; enum pmc_mode mode; struct pmc_hw *phw; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_op_pmcallocate pa; if ((error = copyin(arg, &pa, sizeof(pa))) != 0) break; caps = pa.pm_caps; mode = pa.pm_mode; cpu = pa.pm_cpu; if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && mode != PMC_MODE_TS && mode != PMC_MODE_TC) || (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { error = EINVAL; break; } /* * Virtual PMCs should only ask for a default CPU. * System mode PMCs need to specify a non-default CPU. */ if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { error = EINVAL; break; } /* * Check that an inactive CPU is not being asked for. */ if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* * Refuse an allocation for a system-wide PMC if this * process has been jailed, or if this process lacks * super-user credentials and the sysctl tunable * 'security.bsd.unprivileged_syspmcs' is zero. */ if (PMC_IS_SYSTEM_MODE(mode)) { if (jailed(curthread->td_ucred)) { error = EPERM; break; } if (!pmc_unprivileged_syspmcs) { error = priv_check(curthread, PRIV_PMC_SYSTEM); if (error) break; } } /* * Look for valid values for 'pm_flags' */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { error = EINVAL; break; } /* process logging options are not allowed for system PMCs */ if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { error = EINVAL; break; } /* * All sampling mode PMCs need to be able to interrupt the * CPU. */ if (PMC_IS_SAMPLING_MODE(mode)) caps |= PMC_CAP_INTERRUPT; /* A valid class specifier should have been passed in. */ for (n = 0; n < md->pmd_nclass; n++) if (md->pmd_classdep[n].pcd_class == pa.pm_class) break; if (n == md->pmd_nclass) { error = EINVAL; break; } /* The requested PMC capabilities should be feasible. */ if ((md->pmd_classdep[n].pcd_caps & caps) != caps) { error = EOPNOTSUPP; break; } PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", pa.pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, PMC_ID_INVALID); pmc->pm_event = pa.pm_ev; pmc->pm_state = PMC_STATE_FREE; pmc->pm_caps = caps; pmc->pm_flags = pa.pm_flags; /* switch thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); #define PMC_IS_SHAREABLE_PMC(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ PMC_PHW_FLAG_IS_SHAREABLE) #define PMC_IS_UNALLOCATED(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) if (PMC_IS_SYSTEM_MODE(mode)) { pmc_select_cpu(cpu); for (n = 0; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, cpu) == 0 && (PMC_IS_UNALLOCATED(cpu, n) || PMC_IS_SHAREABLE_PMC(cpu, n)) && pcd->pcd_allocate_pmc(cpu, adjri, pmc, &pa) == 0) break; } } else { /* Process virtual mode */ for (n = 0; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, PMC_CPU_ANY) == 0 && pcd->pcd_allocate_pmc(curthread->td_oncpu, adjri, pmc, &pa) == 0) break; } } #undef PMC_IS_UNALLOCATED #undef PMC_IS_SHAREABLE_PMC pmc_restore_cpu_binding(&pb); if (n == (int) md->pmd_npmc) { pmc_destroy_pmc_descriptor(pmc); pmc = NULL; error = EINVAL; break; } /* Fill in the correct value in the ID field */ pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); /* Process mode PMCs with logging enabled need log files */ if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* All system mode sampling PMCs require a log file */ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* * Configure global pmc's immediately */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; pcd = pmc_ri_to_classdep(md, n, &adjri); if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { (void) pcd->pcd_release_pmc(cpu, adjri, pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; pmc_restore_cpu_binding(&pb); error = EPERM; break; } pmc_restore_cpu_binding(&pb); } pmc->pm_state = PMC_STATE_ALLOCATED; /* * mark row disposition */ if (PMC_IS_SYSTEM_MODE(mode)) PMC_MARK_ROW_STANDALONE(n); else PMC_MARK_ROW_THREAD(n); /* * Register this PMC with the current thread as its owner. */ if ((error = pmc_register_owner(curthread->td_proc, pmc)) != 0) { pmc_release_pmc_descriptor(pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; break; } /* * Return the allocated index. */ pa.pm_pmcid = pmc->pm_id; error = copyout(&pa, arg, sizeof(pa)); } break; /* * Attach a PMC to a process. */ case PMC_OP_PMCATTACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { error = EINVAL; break; } /* PMCs may be (re)attached only when allocated or stopped */ if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } else if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED) { error = EINVAL; break; } /* lookup pid */ if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Ignore processes that are working on exiting. */ if (p->p_flag & P_WEXIT) { error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ break; } /* * we are allowed to attach a PMC to a process if * we can debug it. */ error = p_candebug(curthread, p); PROC_UNLOCK(p); if (error == 0) error = pmc_attach_process(p, pm); } break; /* * Detach an attached PMC from a process. */ case PMC_OP_PMCDETACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Treat processes that are in the process of exiting * as if they were not present. */ if (p->p_flag & P_WEXIT) error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ if (error == 0) error = pmc_detach_process(p, pm); } break; /* * Retrieve the MSR number associated with the counter * 'pmc_id'. This allows processes to directly use RDPMC * instructions to read their PMCs, without the overhead of a * system call. */ case PMC_OP_PMCGETMSR: { int adjri, ri; struct pmc *pm; struct pmc_target *pt; struct pmc_op_getmsr gm; struct pmc_classdep *pcd; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &gm, sizeof(gm))) != 0) break; if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) break; /* * The allocated PMC has to be a process virtual PMC, * i.e., of type MODE_T[CS]. Global PMCs can only be * read using the PMCREAD operation since they may be * allocated on a different CPU than the one we could * be running on at the time of the RDPMC instruction. * * The GETMSR operation is not allowed for PMCs that * are inherited across processes. */ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } /* * It only makes sense to use a RDPMC (or its * equivalent instruction on non-x86 architectures) on * a process that has allocated and attached a PMC to * itself. Conversely the PMC is only allowed to have * one process attached to it -- its owner. */ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || LIST_NEXT(pt, pt_next) != NULL || pt->pt_process->pp_proc != pm->pm_owner->po_owner) { error = EINVAL; break; } ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); /* PMC class has no 'GETMSR' support */ if (pcd->pcd_get_msr == NULL) { error = ENOSYS; break; } if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) break; if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; /* * Mark our process as using MSRs. Update machine * state using a forced context switch. */ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; pmc_force_context_switch(); } break; /* * Release an allocated PMC */ case PMC_OP_PMCRELEASE: { pmc_id_t pmcid; struct pmc *pm; struct pmc_owner *po; struct pmc_op_simple sp; /* * Find PMC pointer for the named PMC. * * Use pmc_release_pmc_descriptor() to switch off the * PMC, remove all its target threads, and remove the * PMC from its owner's list. * * Remove the owner record if this is the last PMC * owned. * * Free up space. */ if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; po = pm->pm_owner; pmc_release_pmc_descriptor(pm); pmc_maybe_remove_owner(po); pmc_destroy_pmc_descriptor(pm); } break; /* * Read and/or write a PMC. */ case PMC_OP_PMCRW: { int adjri; struct pmc *pm; uint32_t cpu, ri; pmc_value_t oldvalue; struct pmc_binding pb; struct pmc_op_pmcrw prw; struct pmc_classdep *pcd; struct pmc_op_pmcrw *pprw; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &prw, sizeof(prw))) != 0) break; ri = 0; PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, prw.pm_flags); /* must have at least one flag set */ if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { error = EINVAL; break; } /* locate pmc descriptor */ if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) break; /* Can't read a PMC that hasn't been started. */ if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } /* writing a new value is allowed only for 'STOPPED' pmcs */ if (pm->pm_state == PMC_STATE_RUNNING && (prw.pm_flags & PMC_F_NEWVALUE)) { error = EBUSY; break; } if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { /* * If this PMC is attached to its owner (i.e., * the process requesting this operation) and * is running, then attempt to get an * upto-date reading from hardware for a READ. * Writes are only allowed when the PMC is * stopped, so only update the saved value * field. * * If the PMC is not running, or is not * attached to its owner, read/write to the * savedvalue field. */ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mtx_pool_lock_spin(pmc_mtxpool, pm); cpu = curthread->td_oncpu; if (prw.pm_flags & PMC_F_OLDVALUE) { if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && (pm->pm_state == PMC_STATE_RUNNING)) error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue); else oldvalue = pm->pm_gv.pm_savedvalue; } if (prw.pm_flags & PMC_F_NEWVALUE) pm->pm_gv.pm_savedvalue = prw.pm_value; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ cpu = PMC_TO_CPU(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* move this thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); critical_enter(); /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue))) goto error; /* write out new value */ if (prw.pm_flags & PMC_F_NEWVALUE) error = (*pcd->pcd_write_pmc)(cpu, adjri, prw.pm_value); error: critical_exit(); pmc_restore_cpu_binding(&pb); if (error) break; } pprw = (struct pmc_op_pmcrw *) arg; -#ifdef DEBUG +#ifdef HWPMC_DEBUG if (prw.pm_flags & PMC_F_NEWVALUE) PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx", ri, prw.pm_value, oldvalue); else if (prw.pm_flags & PMC_F_OLDVALUE) PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); #endif /* return old value if requested */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = copyout(&oldvalue, &pprw->pm_value, sizeof(prw.pm_value)))) break; } break; /* * Set the sampling rate for a sampling mode PMC and the * initial count for a counting mode PMC. */ case PMC_OP_PMCSETCOUNT: { struct pmc *pm; struct pmc_op_pmcsetcount sc; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sc, sizeof(sc))) != 0) break; if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) break; if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pm->pm_sc.pm_reloadcount = sc.pm_count; else pm->pm_sc.pm_initial = sc.pm_count; } break; /* * Start a PMC. */ case PMC_OP_PMCSTART: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmcid %x != id %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; else if (pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_ALLOCATED) { error = EINVAL; break; } error = pmc_start(pm); } break; /* * Stop a PMC. */ case PMC_OP_PMCSTOP: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; /* * Mark the PMC as inactive and invoke the MD stop * routines if needed. */ if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; else if (pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } error = pmc_stop(pm); } break; /* * Write a user supplied value to the log file. */ case PMC_OP_WRITELOG: { struct pmc_op_writelog wl; struct pmc_owner *po; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &wl, sizeof(wl))) != 0) break; if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; break; } error = pmclog_process_userlog(po, &wl); } break; default: error = EINVAL; break; } if (is_sx_locked != 0) { if (is_sx_downgraded) sx_sunlock(&pmc_sx); else sx_xunlock(&pmc_sx); } if (error) atomic_add_int(&pmc_stats.pm_syscall_errors, 1); PICKUP_GIANT(); return error; } /* * Helper functions */ /* * Mark the thread as needing callchain capture and post an AST. The * actual callchain capture will be done in a context where it is safe * to take page faults. */ static void pmc_post_callchain_callback(void) { struct thread *td; td = curthread; /* * If there is multiple PMCs for the same interrupt ignore new post */ if (td->td_pflags & TDP_CALLCHAIN) return; /* * Mark this thread as needing callchain capture. * `td->td_pflags' will be safe to touch because this thread * was in user space when it was interrupted. */ td->td_pflags |= TDP_CALLCHAIN; /* * Don't let this thread migrate between CPUs until callchain * capture completes. */ sched_pin(); return; } /* * Interrupt processing. * * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ int pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace) { int error, callchaindepth; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; error = 0; /* * Allocate space for a sample buffer. */ psb = pmc_pcpu[cpu]->pc_sb[ring]; ps = psb->ps_write; if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_stalled = 1; atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); error = ENOMEM; goto done; } /* Fill in entry. */ PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); KASSERT(pm->pm_runcount >= 0, ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, pm->pm_runcount)); atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */ ps->ps_pmc = pm; if ((td = curthread) && td->td_proc) ps->ps_pid = td->td_proc->p_pid; else ps->ps_pid = -1; ps->ps_cpu = cpu; ps->ps_td = td; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; if (callchaindepth == 1) ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); else { /* * Kernel stack traversals can be done immediately, * while we defer to an AST for user space traversals. */ if (!inuserspace) { callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, callchaindepth, tf); } else { pmc_post_callchain_callback(); callchaindepth = PMC_SAMPLE_INUSE; } } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; if (ps == psb->ps_fence) psb->ps_write = psb->ps_samples; else psb->ps_write = ps; done: /* mark CPU as needing processing */ CPU_SET_ATOMIC(cpu, &pmc_cpumask); return (error); } /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets * rescheduled. */ static void pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) { int i; struct pmc *pm; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; #ifdef INVARIANTS int ncallchains; #endif psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; KASSERT(td->td_pflags & TDP_CALLCHAIN, ("[pmc,%d] Retrieving callchain for thread that doesn't want it", __LINE__)); #ifdef INVARIANTS ncallchains = 0; #endif /* * Iterate through all deferred callchain requests. */ ps = psb->ps_samples; for (i = 0; i < pmc_nsamples; i++, ps++) { if (ps->ps_nsamples != PMC_SAMPLE_INUSE) continue; if (ps->ps_td != td) continue; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, ps->ps_cpu, PCPU_GET(cpuid))); pm = ps->ps_pmc; KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); KASSERT(pm->pm_runcount > 0, ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount)); /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, pmc_callchaindepth, tf); #ifdef INVARIANTS ncallchains++; #endif } KASSERT(ncallchains > 0, ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, cpu)); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ return; } /* * Process saved PC samples. */ static void pmc_process_samples(int cpu, int ring) { struct pmc *pm; int adjri, n; struct thread *td; struct pmc_owner *po; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) break; pm = ps->ps_pmc; KASSERT(pm->pm_runcount > 0, ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, pm->pm_runcount)); po = pm->pm_owner; KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); /* Ignore PMCs that have been switched off */ if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ CPU_SET_ATOMIC(cpu, &pmc_cpumask); break; } PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); /* * If this is a process-mode PMC that is attached to * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } goto entrydone; } /* * Otherwise, this is either a sampling mode PMC that * is attached to a different process than its owner, * or a system-wide sampling PMC. Dispatch a log * entry to the PMC's owner process. */ pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ atomic_subtract_rel_int(&pm->pm_runcount, 1); /* increment read pointer, modulo sample size */ if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } atomic_add_int(&pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) return; /* * Restart any stalled sampling PMCs on this CPU. * * If the NMI handler sets the pm_stalled field of a PMC after * the check below, we'll end up processing the stalled PMC at * the next hardclock tick. */ for (n = 0; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); (void) (*pcd->pcd_get_config)(cpu,adjri,&pm); if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ pm->pm_stalled == 0) /* !stalled */ continue; pm->pm_stalled = 0; (*pcd->pcd_start_pmc)(cpu, adjri); } } /* * Event handlers. */ /* * Handle a process exit. * * Remove this process from all hash tables. If this process * owned any PMCs, turn off those PMCs and deallocate them, * removing any associations with target processes. * * This function will be called by the last 'thread' of a * process. * * XXX This eventhandler gets called early in the exit process. * Consider using a 'hook' invocation from thread_exit() or equivalent * spot. Another negative is that kse_exit doesn't seem to call * exit1() [??]. * */ static void pmc_process_exit(void *arg __unused, struct proc *p) { struct pmc *pm; int adjri, cpu; unsigned int ri; int is_using_hwpmcs; struct pmc_owner *po; struct pmc_process *pp; struct pmc_classdep *pcd; pmc_value_t newvalue, tmp; PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); /* * Log a sysexit event to all SS PMC owners. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); /* * Since this code is invoked by the last thread in an exiting * process, we would have context switched IN at some prior * point. However, with PREEMPTION, kernel mode context * switches may happen any time, so we want to disable a * context switch OUT till we get any PMCs targetting this * process off the hardware. * * We also need to atomically remove this process' * entry from our target process hash table, using * PMC_FLAG_REMOVE. */ PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); critical_enter(); /* no preemption */ cpu = curthread->td_oncpu; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_REMOVE)) != NULL) { PMCDBG(PRC,EXT,2, "process-exit proc=%p pmc-process=%p", p, pp); /* * The exiting process could the target of * some PMCs which will be running on * currently executing CPU. * * We need to turn these PMCs off like we * would do at context switch OUT time. */ for (ri = 0; ri < md->pmd_npmc; ri++) { /* * Pick up the pmc pointer from hardware * state similar to the CSW_OUT code. */ pm = NULL; pcd = pmc_ri_to_classdep(md, ri, &adjri); (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); (void) pcd->pcd_stop_pmc(cpu, adjri); KASSERT(pm->pm_runcount > 0, ("[pmc,%d] bad runcount ri %d rc %d", __LINE__, ri, pm->pm_runcount)); /* Stop hardware only if it is actually running */ if (pm->pm_state == PMC_STATE_RUNNING && pm->pm_stalled == 0) { pcd->pcd_read_pmc(cpu, adjri, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); } atomic_subtract_rel_int(&pm->pm_runcount,1); KASSERT((int) pm->pm_runcount >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); (void) pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Inform the MD layer of this pseudo "context switch * out" */ (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); critical_exit(); /* ok to be pre-empted now */ /* * Unlink this process from the PMCs that are * targetting it. This will send a signal to * all PMC owner's whose PMCs are orphaned. * * Log PMC value at exit time if requested. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) pmclog_process_procexit(pm, pp); pmc_unlink_target_process(pm, pp); } free(pp, M_PMC); } else critical_exit(); /* pp == NULL */ /* * If the process owned PMCs, free them up and free up * memory. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } sx_xunlock(&pmc_sx); } /* * Handle a process fork. * * If the parent process 'p1' is under HWPMC monitoring, then copy * over any attached PMCs that have 'do_descendants' semantics. */ static void pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, int flags) { int is_using_hwpmcs; unsigned int ri; uint32_t do_descendants; struct pmc *pm; struct pmc_owner *po; struct pmc_process *ppnew, *ppold; (void) flags; /* unused parameter */ PROC_LOCK(p1); is_using_hwpmcs = p1->p_flag & P_HWPMC; PROC_UNLOCK(p1); /* * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, p1->p_pid, p1->p_comm, newproc); /* * If the parent process (curthread->td_proc) is a * target of any PMCs, look for PMCs that are to be * inherited, and link these into the new process * descriptor. */ if ((ppold = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) goto done; /* nothing to do */ do_descendants = 0; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; if (do_descendants == 0) /* nothing to do */ goto done; /* allocate a descriptor for the new process */ if ((ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE)) == NULL) goto done; /* * Run through all PMCs that were targeting the old process * and which specified F_DESCENDANTS and attach them to the * new process. * * Log the fork event to all owners of PMCs attached to this * process, if not already logged. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS)) { pmc_link_target_process(pm, ppnew); po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); } /* * Now mark the new process as being tracked by this driver. */ PROC_LOCK(newproc); newproc->p_flag |= P_HWPMC; PROC_UNLOCK(newproc); done: sx_xunlock(&pmc_sx); } static void pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; sx_slock(&pmc_sx); /* * Notify owners of system sampling PMCs about KLD operations. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ sx_sunlock(&pmc_sx); } static void pmc_kld_unload(void *arg __unused, const char *filename __unused, caddr_t address, size_t size) { struct pmc_owner *po; sx_slock(&pmc_sx); LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); /* * TODO: Notify owners of process-sampling PMCs. */ sx_sunlock(&pmc_sx); } /* * initialization */ static const char *pmc_name_of_pmcclass[] = { #undef __PMC_CLASS #define __PMC_CLASS(N) #N , __PMC_CLASSES() }; /* * Base class initializer: allocate structure and set default classes. */ struct pmc_mdep * pmc_mdep_alloc(int nclasses) { struct pmc_mdep *md; int n; /* SOFT + md classes */ n = 1 + nclasses; md = malloc(sizeof(struct pmc_mdep) + n * sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO); md->pmd_nclass = n; /* Add base class. */ pmc_soft_initialize(md); return md; } void pmc_mdep_free(struct pmc_mdep *md) { pmc_soft_finalize(md); free(md, M_PMC); } static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static struct pmc_mdep * pmc_generic_cpu_initialize(void) { struct pmc_mdep *md; md = pmc_mdep_alloc(0); md->pmd_cputype = PMC_CPU_GENERIC; md->pmd_pcpu_init = NULL; md->pmd_pcpu_fini = NULL; md->pmd_switch_in = generic_switch_in; md->pmd_switch_out = generic_switch_out; return (md); } static void pmc_generic_cpu_finalize(struct pmc_mdep *md) { (void) md; } static int pmc_initialize(void) { int c, cpu, error, n, ri; unsigned int maxcpu; struct pmc_binding pb; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *sb; md = NULL; error = 0; -#ifdef DEBUG +#ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr))) pmc_debugflags_parse(pmc_debugstr, pmc_debugstr+strlen(pmc_debugstr)); #endif PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); /* check kernel version */ if (pmc_kernel_version != PMC_VERSION) { if (pmc_kernel_version == 0) printf("hwpmc: this kernel has not been compiled with " "'options HWPMC_HOOKS'.\n"); else printf("hwpmc: kernel version (0x%x) does not match " "module version (0x%x).\n", pmc_kernel_version, PMC_VERSION); return EPROGMISMATCH; } /* * check sysctl parameters */ if (pmc_hashsize <= 0) { (void) printf("hwpmc: tunable \"hashsize\"=%d must be " "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { (void) printf("hwpmc: tunable \"nsamples\"=%d out of " "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " "range - using %d.\n", pmc_callchaindepth, PMC_CALLCHAIN_DEPTH_MAX); pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; } md = pmc_md_initialize(); if (md == NULL) { /* Default to generic CPU. */ md = pmc_generic_cpu_initialize(); if (md == NULL) return (ENOSYS); } KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); /* Compute the map from row-indices to classdep pointers. */ pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); for (n = 0; n < md->pmd_npmc; n++) pmc_rowindex_to_classdep[n] = NULL; for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; for (n = 0; n < pcd->pcd_num; n++, ri++) pmc_rowindex_to_classdep[ri] = pcd; } KASSERT(ri == md->pmd_npmc, ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, ri, md->pmd_npmc)); maxcpu = pmc_cpu_max(); /* allocate space for the per-cpu array */ pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, M_WAITOK|M_ZERO); /* per-cpu 'saved values' for managing process-mode PMCs */ pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK); /* Perform CPU-dependent initialization. */ pmc_save_cpu_binding(&pb); error = 0; for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pmc_select_cpu(cpu); pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, M_WAITOK|M_ZERO); if (md->pmd_pcpu_init) error = md->pmd_pcpu_init(md, cpu); for (n = 0; error == 0 && n < md->pmd_nclass; n++) error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); } pmc_restore_cpu_binding(&pb); if (error) return (error); /* allocate space for the sample array */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; sb = malloc(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; sb = malloc(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; } /* allocate space for the row disposition array */ pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); /* mark all PMCs as available */ for (n = 0; n < (int) md->pmd_npmc; n++) PMC_MARK_ROW_FREE(n); /* allocate thread hash tables */ pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, &pmc_ownerhashmask); pmc_processhash = hashinit(pmc_hashsize, M_PMC, &pmc_processhashmask); mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", MTX_SPIN); LIST_INIT(&pmc_ss_owners); pmc_ss_count = 0; /* allocate a pool of spin mutexes */ pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, MTX_SPIN); PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); /* register kld event handlers */ pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, NULL, EVENTHANDLER_PRI_ANY); pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, NULL, EVENTHANDLER_PRI_ANY); /* initialize logging */ pmclog_initialize(); /* set hook functions */ pmc_intr = md->pmd_intr; pmc_hook = pmc_hook_handler; if (error == 0) { printf(PMC_MODULE_NAME ":"); for (n = 0; n < (int) md->pmd_nclass; n++) { pcd = &md->pmd_classdep[n]; printf(" %s/%d/%d/0x%b", pmc_name_of_pmcclass[pcd->pcd_class], pcd->pcd_num, pcd->pcd_width, pcd->pcd_caps, "\20" "\1INT\2USR\3SYS\4EDG\5THR" "\6REA\7WRI\10INV\11QUA\12PRC" "\13TAG\14CSC"); } printf("\n"); } return (error); } /* prepare to be unloaded */ static void pmc_cleanup(void) { int c, cpu; unsigned int maxcpu; struct pmc_ownerhash *ph; struct pmc_owner *po, *tmp; struct pmc_binding pb; -#ifdef DEBUG +#ifdef HWPMC_DEBUG struct pmc_processhash *prh; #endif PMCDBG(MOD,INI,0, "%s", "cleanup"); /* switch off sampling */ CPU_ZERO(&pmc_cpumask); pmc_intr = NULL; sx_xlock(&pmc_sx); if (pmc_hook == NULL) { /* being unloaded already */ sx_xunlock(&pmc_sx); return; } pmc_hook = NULL; /* prevent new threads from entering module */ /* deregister event handlers */ EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); /* send SIGBUS to all owner threads, free up allocations */ if (pmc_ownerhash) for (ph = pmc_ownerhash; ph <= &pmc_ownerhash[pmc_ownerhashmask]; ph++) { LIST_FOREACH_SAFE(po, ph, po_next, tmp) { pmc_remove_owner(po); /* send SIGBUS to owner processes */ PMCDBG(MOD,INI,2, "cleanup signal proc=%p " "(%d, %s)", po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); PROC_LOCK(po->po_owner); kern_psignal(po->po_owner, SIGBUS); PROC_UNLOCK(po->po_owner); pmc_destroy_owner_descriptor(po); } } /* reclaim allocated data structures */ if (pmc_mtxpool) mtx_pool_destroy(&pmc_mtxpool); mtx_destroy(&pmc_processhash_mtx); if (pmc_processhash) { -#ifdef DEBUG +#ifdef HWPMC_DEBUG struct pmc_process *pp; PMCDBG(MOD,INI,3, "%s", "destroy process hash"); for (prh = pmc_processhash; prh <= &pmc_processhash[pmc_processhashmask]; prh++) LIST_FOREACH(pp, prh, pp_next) PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); #endif hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); pmc_processhash = NULL; } if (pmc_ownerhash) { PMCDBG(MOD,INI,3, "%s", "destroy owner hash"); hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); pmc_ownerhash = NULL; } KASSERT(LIST_EMPTY(&pmc_ss_owners), ("[pmc,%d] Global SS owner list not empty", __LINE__)); KASSERT(pmc_ss_count == 0, ("[pmc,%d] Global SS count not empty", __LINE__)); /* do processor and pmc-class dependent cleanup */ maxcpu = pmc_cpu_max(); PMCDBG(MOD,INI,3, "%s", "md cleanup"); if (md) { pmc_save_cpu_binding(&pb); for (cpu = 0; cpu < maxcpu; cpu++) { PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", cpu, pmc_pcpu[cpu]); if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) continue; pmc_select_cpu(cpu); for (c = 0; c < md->pmd_nclass; c++) md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); if (md->pmd_pcpu_fini) md->pmd_pcpu_fini(md, cpu); } if (md->pmd_cputype == PMC_CPU_GENERIC) pmc_generic_cpu_finalize(md); else pmc_md_finalize(md); pmc_mdep_free(md); md = NULL; pmc_restore_cpu_binding(&pb); } /* Free per-cpu descriptors. */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); free(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); pmc_pcpu = NULL; free(pmc_pcpu_saved, M_PMC); pmc_pcpu_saved = NULL; if (pmc_pmcdisp) { free(pmc_pmcdisp, M_PMC); pmc_pmcdisp = NULL; } if (pmc_rowindex_to_classdep) { free(pmc_rowindex_to_classdep, M_PMC); pmc_rowindex_to_classdep = NULL; } pmclog_shutdown(); sx_xunlock(&pmc_sx); /* we are done */ } /* * The function called at load/unload. */ static int load (struct module *module __unused, int cmd, void *arg __unused) { int error; error = 0; switch (cmd) { case MOD_LOAD : /* initialize the subsystem */ error = pmc_initialize(); if (error != 0) break; PMCDBG(MOD,INI,1, "syscall=%d maxcpu=%d", pmc_syscall_num, pmc_cpu_max()); break; case MOD_UNLOAD : case MOD_SHUTDOWN: pmc_cleanup(); PMCDBG(MOD,INI,1, "%s", "unloaded"); break; default : error = EINVAL; /* XXX should panic(9) */ break; } return error; } /* memory pool */ MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); Index: head/sys/sys/pmc.h =================================================================== --- head/sys/sys/pmc.h (revision 282640) +++ head/sys/sys/pmc.h (revision 282641) @@ -1,1113 +1,1113 @@ /*- * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ #define PMC_VERSION_MAJOR 0x03 #define PMC_VERSION_MINOR 0x01 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K7, 0x00, "AMD K7") \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(ARMV7, 0x500, "ARMv7") \ __PMC_CPU(INTEL_P5, 0x80, "Intel Pentium") \ __PMC_CPU(INTEL_P6, 0x81, "Intel Pentium Pro") \ __PMC_CPU(INTEL_CL, 0x82, "Intel Celeron") \ __PMC_CPU(INTEL_PII, 0x83, "Intel Pentium II") \ __PMC_CPU(INTEL_PIII, 0x84, "Intel Pentium III") \ __PMC_CPU(INTEL_PM, 0x85, "Intel Pentium M") \ __PMC_CPU(INTEL_PIV, 0x86, "Intel Pentium IV") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ __PMC_CPU(MIPS_74K, 0x202, "MIPS 74K") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_MPC85XX, 0x340, "Freescale PowerPC MPC85XX") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(GENERIC, 0x400, "Generic") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K7 #define PMC_CPU_LAST PMC_CPU_GENERIC /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC) /* CPU Timestamp counter */ \ __PMC_CLASS(K7) /* AMD K7 performance counters */ \ __PMC_CLASS(K8) /* AMD K8 performance counters */ \ __PMC_CLASS(P5) /* Intel Pentium counters */ \ __PMC_CLASS(P6) /* Intel Pentium Pro counters */ \ __PMC_CLASS(P4) /* Intel Pentium-IV counters */ \ __PMC_CLASS(IAF) /* Intel Core2/Atom, fixed function */ \ __PMC_CLASS(IAP) /* Intel Core...Atom, programmable */ \ __PMC_CLASS(UCF) /* Intel Uncore fixed function */ \ __PMC_CLASS(UCP) /* Intel Uncore programmable */ \ __PMC_CLASS(XSCALE) /* Intel XScale counters */ \ __PMC_CLASS(ARMV7) /* ARMv7 */ \ __PMC_CLASS(MIPS24K) /* MIPS 24K */ \ __PMC_CLASS(OCTEON) /* Cavium Octeon */ \ __PMC_CLASS(MIPS74K) /* MIPS 74K */ \ __PMC_CLASS(PPC7450) /* Motorola MPC7450 class */ \ __PMC_CLASS(PPC970) /* IBM PowerPC 970 class */ \ __PMC_CLASS(E500) /* Freescale e500 class */ \ __PMC_CLASS(SOFT) /* Software events */ enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(N) PMC_CLASS_##N , __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_SOFT /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparision sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_CASCADE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ #define PMC_F_FORCE 0x00000001 /*OP ADMIN force operation */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ #define PMC_F_KGMON 0x00000040 /*OP ALLOCATE kgmon(8) profiling */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 128 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * * +--------+----------+-----------+-----------+ * | CPU | PMC MODE | PMC CLASS | ROW INDEX | * +--------+----------+-----------+-----------+ * * where each field is 8 bits wide. Field 'CPU' is set to the * requested CPU for system-wide PMCs or PMC_CPU_ANY for process-mode * PMCs. Field 'PMC MODE' is the allocated PMC mode. Field 'PMC * CLASS' is the class of the PMC. Field 'ROW INDEX' is the row index * for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) #define PMC_ID_TO_CLASS(ID) (((ID) & 0xFF00) >> 8) #define PMC_ID_TO_MODE(ID) (((ID) & 0xFF0000) >> 16) #define PMC_ID_TO_CPU(ID) (((ID) & 0xFF000000) >> 24) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ ((((CPU) & 0xFF) << 24) | (((MODE) & 0xFF) << 16) | \ (((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ struct pmc_op_getdriverstats { int pm_intr_ignored; /* #interrupts ignored */ int pm_intr_processed; /* #interrupts processed */ int pm_intr_bufferfull; /* #interrupts with ENOSPC */ int pm_syscalls; /* #syscalls */ int pm_syscall_errors; /* #syscalls with errors */ int pm_buffer_requests; /* #buffer requests */ int pm_buffer_requests_failed; /* #failed buffer requests */ int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address assoicated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 4 #define PMC_NLOGBUFFERS 1024 #define PMC_NSAMPLES 1024 #define PMC_CALLCHAIN_DEPTH 32 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; uint32_t pm_stalled; /* marks stalled sampling PMCs */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ int pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target thread */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct pmclog_buffer *po_curbuf; /* current log buffer */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint8_t ps_cpu; /* cpu number */ uint8_t ps_flags; /* other flags */ pid_t ps_pid; /* process PID or -1 */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF) struct pmc_samplebuffer { struct pmc_sample * volatile ps_read; /* read pointer */ struct pmc_sample * volatile ps_write; /* write pointer */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample *ps_fence; /* one beyond ps_samples[] */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[2]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri); int (*pcd_stop_pmc)(int _cpu, int _ri); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* per-cpu initialization and finalization */ int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(int _cpu, struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_op_getdriverstats pmc_stats; -#if defined(DEBUG) +#if defined(HWPMC_DEBUG) /* debug flags, major flag groups */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG(M,N,L,F,...) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ printf(#M ":" #N ":" #L ": " F "\n", __VA_ARGS__); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define PMCDBG(M,N,L,F,...) /* nothing */ #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm, struct trapframe *_tf, int _inuserspace); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */