Index: stable/8/lib/libpmc/Makefile
===================================================================
--- stable/8/lib/libpmc/Makefile	(revision 206701)
+++ stable/8/lib/libpmc/Makefile	(revision 206702)
@@ -1,65 +1,70 @@
 # $FreeBSD$
 
 LIB=	pmc
 
 SRCS=	libpmc.c pmclog.c
 INCS=	pmc.h pmclog.h
 
 WARNS?=	6
 
 MAN=	pmc.3
 MAN+=	pmc_allocate.3
 MAN+=	pmc_attach.3
 MAN+=	pmc_capabilities.3
 MAN+=	pmc_configure_logfile.3
 MAN+=	pmc_disable.3
 MAN+=	pmc_event_names_of_class.3
 MAN+=	pmc_get_driver_stats.3
 MAN+=	pmc_get_msr.3
 MAN+=	pmc_init.3
 MAN+=	pmc_name_of_capability.3
 MAN+=	pmc_read.3
 MAN+=	pmc_set.3
 MAN+=	pmc_start.3
 MAN+=	pmclog.3
 
 # PMC-dependent manual pages
 MAN+=	pmc.atom.3
 MAN+=	pmc.core.3
 MAN+=	pmc.core2.3
 MAN+=	pmc.iaf.3
+MAN+=	pmc.ucf.3
 MAN+=	pmc.k7.3
 MAN+=	pmc.k8.3
 MAN+=	pmc.p4.3
 MAN+=	pmc.p5.3
 MAN+=	pmc.p6.3
+MAN+=	pmc.corei7.3
+MAN+=	pmc.corei7uc.3
+MAN+=	pmc.westmere.3
+MAN+=	pmc.westmereuc.3
 MAN+=	pmc.tsc.3
 
 MLINKS+= \
 	pmc_allocate.3 pmc_release.3 \
 	pmc_attach.3 pmc_detach.3 \
 	pmc_capabilities.3 pmc_ncpu.3 \
 	pmc_capabilities.3 pmc_npmc.3 \
 	pmc_capabilities.3 pmc_pmcinfo.3 \
 	pmc_capabilities.3 pmc_cpuinfo.3 \
 	pmc_capabilities.3 pmc_width.3 \
 	pmc_configure_logfile.3 pmc_flush_logfile.3 \
 	pmc_configure_logfile.3 pmc_writelog.3 \
 	pmc_disable.3 pmc_enable.3 \
 	pmc_name_of_capability.3 pmc_name_of_class.3 \
 	pmc_name_of_capability.3 pmc_name_of_cputype.3 \
 	pmc_name_of_capability.3 pmc_name_of_disposition.3 \
 	pmc_name_of_capability.3 pmc_name_of_event.3 \
 	pmc_name_of_capability.3 pmc_name_of_mode.3 \
 	pmc_name_of_capability.3 pmc_name_of_state.3 \
 	pmc_read.3 pmc_rw.3 \
 	pmc_read.3 pmc_write.3 \
 	pmc_start.3 pmc_stop.3
 
 MLINKS+= \
 	pmclog.3 pmclog_open.3 \
 	pmclog.3 pmclog_close.3 \
 	pmclog.3 pmclog_feed.3 \
 	pmclog.3 pmclog_read.3
 
 .include <bsd.lib.mk>
Index: stable/8/lib/libpmc/libpmc.c
===================================================================
--- stable/8/lib/libpmc/libpmc.c	(revision 206701)
+++ stable/8/lib/libpmc/libpmc.c	(revision 206702)
@@ -1,2845 +1,3012 @@
 /*-
  * Copyright (c) 2003-2008 Joseph Koshy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/module.h>
 #include <sys/pmc.h>
 #include <sys/syscall.h>
 
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <pmc.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 
 #include "libpmcinternal.h"
 
 /* Function prototypes */
 #if defined(__i386__)
 static int k7_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__amd64__) || defined(__i386__)
 static int iaf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int iap_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
+static int ucf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int ucp_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
 static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__i386__)
 static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__amd64__) || defined(__i386__)
 static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 
 #define PMC_CALL(cmd, params)				\
 	syscall(pmc_syscall, PMC_OP_##cmd, (params))
 
 /*
  * Event aliases provide a way for the user to ask for generic events
  * like "cache-misses", or "instructions-retired".  These aliases are
  * mapped to the appropriate canonical event descriptions using a
  * lookup table.
  */
 struct pmc_event_alias {
 	const char	*pm_alias;
 	const char	*pm_spec;
 };
 
 static const struct pmc_event_alias *pmc_mdep_event_aliases;
 
 /*
  * The pmc_event_descr structure maps symbolic names known to the user
  * to integer codes used by the PMC KLD.
  */
 struct pmc_event_descr {
 	const char	*pm_ev_name;
 	enum pmc_event	pm_ev_code;
 };
 
 /*
  * The pmc_class_descr structure maps class name prefixes for
  * event names to event tables and other PMC class data.
  */
 struct pmc_class_descr {
 	const char	*pm_evc_name;
 	size_t		pm_evc_name_size;
 	enum pmc_class	pm_evc_class;
 	const struct pmc_event_descr *pm_evc_event_table;
 	size_t		pm_evc_event_table_size;
 	int		(*pm_evc_allocate_pmc)(enum pmc_event _pe,
 			    char *_ctrspec, struct pmc_op_pmcallocate *_pa);
 };
 
 #define	PMC_TABLE_SIZE(N)	(sizeof(N)/sizeof(N[0]))
 #define	PMC_EVENT_TABLE_SIZE(N)	PMC_TABLE_SIZE(N##_event_table)
 
 #undef	__PMC_EV
 #define	__PMC_EV(C,N) { #N, PMC_EV_ ## C ## _ ## N },
 
 /*
  * PMC_CLASSDEP_TABLE(NAME, CLASS)
  *
  * Define a table mapping event names and aliases to HWPMC event IDs.
  */
 #define	PMC_CLASSDEP_TABLE(N, C)				\
 	static const struct pmc_event_descr N##_event_table[] =	\
 	{							\
 		__PMC_EV_##C()					\
 	}
 
 PMC_CLASSDEP_TABLE(iaf, IAF);
 PMC_CLASSDEP_TABLE(k7, K7);
 PMC_CLASSDEP_TABLE(k8, K8);
 PMC_CLASSDEP_TABLE(p4, P4);
 PMC_CLASSDEP_TABLE(p5, P5);
 PMC_CLASSDEP_TABLE(p6, P6);
+PMC_CLASSDEP_TABLE(ucf, UCF);
 
 #undef	__PMC_EV_ALIAS
 #define	__PMC_EV_ALIAS(N,CODE) 	{ N, PMC_EV_##CODE },
 
 static const struct pmc_event_descr atom_event_table[] =
 {
 	__PMC_EV_ALIAS_ATOM()
 };
 
 static const struct pmc_event_descr core_event_table[] =
 {
 	__PMC_EV_ALIAS_CORE()
 };
 
 
 static const struct pmc_event_descr core2_event_table[] =
 {
 	__PMC_EV_ALIAS_CORE2()
 };
 
 static const struct pmc_event_descr corei7_event_table[] =
 {
 	__PMC_EV_ALIAS_COREI7()
 };
 
+static const struct pmc_event_descr westmere_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMERE()
+};
+
+static const struct pmc_event_descr corei7uc_event_table[] =
+{
+	__PMC_EV_ALIAS_COREI7UC()
+};
+
+static const struct pmc_event_descr westmereuc_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMEREUC()
+};
+
 /*
  * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...)
  *
  * Map a CPU to the PMC classes it supports.
  */
 #define	PMC_MDEP_TABLE(N,C,...)				\
 	static const enum pmc_class N##_pmc_classes[] = {	\
 		PMC_CLASS_##C, __VA_ARGS__			\
 	}
 
 PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
-PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(p5, P5, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(p6, P6, PMC_CLASS_TSC);
 
 static const struct pmc_event_descr tsc_event_table[] =
 {
 	__PMC_EV_TSC()
 };
 
 #undef	PMC_CLASS_TABLE_DESC
 #define	PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR)	\
 static const struct pmc_class_descr NAME##_class_table_descr =	\
 	{							\
 		.pm_evc_name  = #CLASS "-",			\
 		.pm_evc_name_size = sizeof(#CLASS "-") - 1,	\
 		.pm_evc_class = PMC_CLASS_##CLASS ,		\
 		.pm_evc_event_table = EVENTS##_event_table ,	\
 		.pm_evc_event_table_size = 			\
 			PMC_EVENT_TABLE_SIZE(EVENTS),		\
 		.pm_evc_allocate_pmc = ALLOCATOR##_allocate_pmc	\
 	}
 
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(iaf, IAF, iaf, iaf);
 PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap);
 PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
 PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
 PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
+PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
+PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
+PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
+PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp);
 #endif
 #if	defined(__i386__)
 PMC_CLASS_TABLE_DESC(k7, K7, k7, k7);
 #endif
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(k8, K8, k8, k8);
 PMC_CLASS_TABLE_DESC(p4, P4, p4, p4);
 #endif
 #if	defined(__i386__)
 PMC_CLASS_TABLE_DESC(p5, P5, p5, p5);
 PMC_CLASS_TABLE_DESC(p6, P6, p6, p6);
 #endif
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
 #endif
 
 #undef	PMC_CLASS_TABLE_DESC
 
 static const struct pmc_class_descr **pmc_class_table;
 #define	PMC_CLASS_TABLE_SIZE	cpu_info.pm_nclass
 
 static const enum pmc_class *pmc_mdep_class_list;
 static size_t pmc_mdep_class_list_size;
 
 /*
  * Mapping tables, mapping enumeration values to human readable
  * strings.
  */
 
 static const char * pmc_capability_names[] = {
 #undef	__PMC_CAP
 #define	__PMC_CAP(N,V,D)	#N ,
 	__PMC_CAPS()
 };
 
 static const char * pmc_class_names[] = {
 #undef	__PMC_CLASS
 #define __PMC_CLASS(C)	#C ,
 	__PMC_CLASSES()
 };
 
 struct pmc_cputype_map {
 	enum pmc_class	pm_cputype;
 	const char	*pm_name;
 };
 
 static const struct pmc_cputype_map pmc_cputype_names[] = {
 #undef	__PMC_CPU
 #define	__PMC_CPU(S, V, D) { .pm_cputype = PMC_CPU_##S, .pm_name = #S } ,
 	__PMC_CPUS()
 };
 
 static const char * pmc_disposition_names[] = {
 #undef	__PMC_DISP
 #define	__PMC_DISP(D)	#D ,
 	__PMC_DISPOSITIONS()
 };
 
 static const char * pmc_mode_names[] = {
 #undef  __PMC_MODE
 #define __PMC_MODE(M,N)	#M ,
 	__PMC_MODES()
 };
 
 static const char * pmc_state_names[] = {
 #undef  __PMC_STATE
 #define __PMC_STATE(S) #S ,
 	__PMC_STATES()
 };
 
 static int pmc_syscall = -1;		/* filled in by pmc_init() */
 
 static struct pmc_cpuinfo cpu_info;	/* filled in by pmc_init() */
 
 /* Event masks for events */
 struct pmc_masks {
 	const char	*pm_name;
 	const uint32_t	pm_value;
 };
 #define	PMCMASK(N,V)	{ .pm_name = #N, .pm_value = (V) }
-#define	NULLMASK	PMCMASK(NULL,0)
+#define	NULLMASK	{ .pm_name = NULL }
 
 #if defined(__amd64__) || defined(__i386__)
 static int
 pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint32_t *evmask)
 {
 	const struct pmc_masks *pm;
 	char *q, *r;
 	int c;
 
 	if (pmask == NULL)	/* no mask keywords */
 		return (-1);
 	q = strchr(p, '=');	/* skip '=' */
 	if (*++q == '\0')	/* no more data */
 		return (-1);
 	c = 0;			/* count of mask keywords seen */
 	while ((r = strsep(&q, "+")) != NULL) {
 		for (pm = pmask; pm->pm_name && strcasecmp(r, pm->pm_name);
 		    pm++)
 			;
 		if (pm->pm_name == NULL) /* not found */
 			return (-1);
 		*evmask |= pm->pm_value;
 		c++;
 	}
 	return (c);
 }
 #endif
 
 #define	KWMATCH(p,kw)		(strcasecmp((p), (kw)) == 0)
 #define	KWPREFIXMATCH(p,kw)	(strncasecmp((p), (kw), sizeof((kw)) - 1) == 0)
 #define	EV_ALIAS(N,S)		{ .pm_alias = N, .pm_spec = S }
 
 #if defined(__i386__)
 
 /*
  * AMD K7 (Athlon) CPUs.
  */
 
 static struct pmc_event_alias k7_aliases[] = {
 	EV_ALIAS("branches",		"k7-retired-branches"),
 	EV_ALIAS("branch-mispredicts",	"k7-retired-branches-mispredicted"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("dc-misses",		"k7-dc-misses"),
 	EV_ALIAS("ic-misses",		"k7-ic-misses"),
 	EV_ALIAS("instructions",	"k7-retired-instructions"),
 	EV_ALIAS("interrupts",		"k7-hardware-interrupts"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	K7_KW_COUNT	"count"
 #define	K7_KW_EDGE	"edge"
 #define	K7_KW_INV	"inv"
 #define	K7_KW_OS	"os"
 #define	K7_KW_UNITMASK	"unitmask"
 #define	K7_KW_USR	"usr"
 
 static int
 k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char		*e, *p, *q;
 	int		c, has_unitmask;
 	uint32_t	count, unitmask;
 
 	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 
 	if (pe == PMC_EV_K7_DC_REFILLS_FROM_L2 ||
 	    pe == PMC_EV_K7_DC_REFILLS_FROM_SYSTEM ||
 	    pe == PMC_EV_K7_DC_WRITEBACKS) {
 		has_unitmask = 1;
 		unitmask = AMD_PMC_UNITMASK_MOESI;
 	} else
 		unitmask = has_unitmask = 0;
 
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWPREFIXMATCH(p, K7_KW_COUNT "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_amd.pm_amd_config |=
 			    AMD_PMC_TO_COUNTER(count);
 
 		} else if (KWMATCH(p, K7_KW_EDGE)) {
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		} else if (KWMATCH(p, K7_KW_INV)) {
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		} else if (KWMATCH(p, K7_KW_OS)) {
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		} else if (KWPREFIXMATCH(p, K7_KW_UNITMASK "=")) {
 			if (has_unitmask == 0)
 				return (-1);
 			unitmask = 0;
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			while ((c = tolower(*q++)) != 0)
 				if (c == 'm')
 					unitmask |= AMD_PMC_UNITMASK_M;
 				else if (c == 'o')
 					unitmask |= AMD_PMC_UNITMASK_O;
 				else if (c == 'e')
 					unitmask |= AMD_PMC_UNITMASK_E;
 				else if (c == 's')
 					unitmask |= AMD_PMC_UNITMASK_S;
 				else if (c == 'i')
 					unitmask |= AMD_PMC_UNITMASK_I;
 				else if (c == '+')
 					continue;
 				else
 					return (-1);
 
 			if (unitmask == 0)
 				return (-1);
 
 		} else if (KWMATCH(p, K7_KW_USR)) {
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		} else
 			return (-1);
 	}
 
 	if (has_unitmask) {
 		pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		pmc_config->pm_md.pm_amd.pm_amd_config |=
 		    AMD_PMC_TO_UNITMASK(unitmask);
 	}
 
 	return (0);
 
 }
 
 #endif
 
 #if defined(__amd64__) || defined(__i386__)
 
 /*
  * Intel Core (Family 6, Model E) PMCs.
  */
 
 static struct pmc_event_alias core_aliases[] = {
 	EV_ALIAS("branches",		"iap-br-instr-ret"),
 	EV_ALIAS("branch-mispredicts",	"iap-br-mispred-ret"),
 	EV_ALIAS("cycles",		"tsc-tsc"),
 	EV_ALIAS("ic-misses",		"iap-icache-misses"),
 	EV_ALIAS("instructions",	"iap-instr-ret"),
 	EV_ALIAS("interrupts",		"iap-core-hw-int-rx"),
 	EV_ALIAS("unhalted-cycles",	"iap-unhalted-core-cycles"),
 	EV_ALIAS(NULL, NULL)
 };
 
 /*
  * Intel Core2 (Family 6, Model F), Core2Extreme (Family 6, Model 17H)
  * and Atom (Family 6, model 1CH) PMCs.
  *
  * We map aliases to events on the fixed-function counters if these
  * are present.  Note that not all CPUs in this family contain fixed-function
  * counters.
  */
 
 static struct pmc_event_alias core2_aliases[] = {
 	EV_ALIAS("branches",		"iap-br-inst-retired.any"),
 	EV_ALIAS("branch-mispredicts",	"iap-br-inst-retired.mispred"),
 	EV_ALIAS("cycles",		"tsc-tsc"),
 	EV_ALIAS("ic-misses",		"iap-l1i-misses"),
 	EV_ALIAS("instructions",	"iaf-instr-retired.any"),
 	EV_ALIAS("interrupts",		"iap-hw-int-rcv"),
 	EV_ALIAS("unhalted-cycles",	"iaf-cpu-clk-unhalted.core"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias core2_aliases_without_iaf[] = {
 	EV_ALIAS("branches",		"iap-br-inst-retired.any"),
 	EV_ALIAS("branch-mispredicts",	"iap-br-inst-retired.mispred"),
 	EV_ALIAS("cycles",		"tsc-tsc"),
 	EV_ALIAS("ic-misses",		"iap-l1i-misses"),
 	EV_ALIAS("instructions",	"iap-inst-retired.any_p"),
 	EV_ALIAS("interrupts",		"iap-hw-int-rcv"),
 	EV_ALIAS("unhalted-cycles",	"iap-cpu-clk-unhalted.core_p"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	atom_aliases			core2_aliases
 #define	atom_aliases_without_iaf	core2_aliases_without_iaf
 #define corei7_aliases			core2_aliases
 #define corei7_aliases_without_iaf	core2_aliases_without_iaf
+#define westmere_aliases		core2_aliases
+#define westmere_aliases_without_iaf	core2_aliases_without_iaf
 
 #define	IAF_KW_OS		"os"
 #define	IAF_KW_USR		"usr"
 #define	IAF_KW_ANYTHREAD	"anythread"
 
 /*
  * Parse an event specifier for Intel fixed function counters.
  */
 static int
 iaf_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *p;
 
 	(void) pe;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	pmc_config->pm_md.pm_iaf.pm_iaf_flags = 0;
 
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWMATCH(p, IAF_KW_OS))
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		else if (KWMATCH(p, IAF_KW_USR))
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		else if (KWMATCH(p, IAF_KW_ANYTHREAD))
 			pmc_config->pm_md.pm_iaf.pm_iaf_flags |= IAF_ANY;
 		else
 			return (-1);
 	}
 
 	return (0);
 }
 
 /*
  * Core/Core2 support.
  */
 
 #define	IAP_KW_AGENT		"agent"
 #define	IAP_KW_ANYTHREAD	"anythread"
 #define	IAP_KW_CACHESTATE	"cachestate"
 #define	IAP_KW_CMASK		"cmask"
 #define	IAP_KW_CORE		"core"
 #define	IAP_KW_EDGE		"edge"
 #define	IAP_KW_INV		"inv"
 #define	IAP_KW_OS		"os"
 #define	IAP_KW_PREFETCH		"prefetch"
 #define	IAP_KW_SNOOPRESPONSE	"snoopresponse"
 #define	IAP_KW_SNOOPTYPE	"snooptype"
 #define	IAP_KW_TRANSITION	"trans"
 #define	IAP_KW_USR		"usr"
+#define	IAP_KW_RSP		"rsp"
 
 static struct pmc_masks iap_core_mask[] = {
 	PMCMASK(all,	(0x3 << 14)),
 	PMCMASK(this,	(0x1 << 14)),
 	NULLMASK
 };
 
 static struct pmc_masks iap_agent_mask[] = {
 	PMCMASK(this,	0),
 	PMCMASK(any,	(0x1 << 13)),
 	NULLMASK
 };
 
 static struct pmc_masks iap_prefetch_mask[] = {
 	PMCMASK(both,		(0x3 << 12)),
 	PMCMASK(only,		(0x1 << 12)),
 	PMCMASK(exclude,	0),
 	NULLMASK
 };
 
 static struct pmc_masks iap_cachestate_mask[] = {
 	PMCMASK(i,		(1 <<  8)),
 	PMCMASK(s,		(1 <<  9)),
 	PMCMASK(e,		(1 << 10)),
 	PMCMASK(m,		(1 << 11)),
 	NULLMASK
 };
 
 static struct pmc_masks iap_snoopresponse_mask[] = {
 	PMCMASK(clean,		(1 << 8)),
 	PMCMASK(hit,		(1 << 9)),
 	PMCMASK(hitm,		(1 << 11)),
 	NULLMASK
 };
 
 static struct pmc_masks iap_snooptype_mask[] = {
 	PMCMASK(cmp2s,		(1 << 8)),
 	PMCMASK(cmp2i,		(1 << 9)),
 	NULLMASK
 };
 
 static struct pmc_masks iap_transition_mask[] = {
 	PMCMASK(any,		0x00),
 	PMCMASK(frequency,	0x10),
 	NULLMASK
 };
 
+static struct pmc_masks iap_rsp_mask[] = {
+	PMCMASK(DMND_DATA_RD,		(1 <<  0)),
+	PMCMASK(DMND_RFO,		(1 <<  1)),
+	PMCMASK(DMND_IFETCH,		(1 <<  2)),
+	PMCMASK(WB,			(1 <<  3)),
+	PMCMASK(PF_DATA_RD,		(1 <<  4)),
+	PMCMASK(PF_RFO,			(1 <<  5)),
+	PMCMASK(PF_IFETCH,		(1 <<  6)),
+	PMCMASK(OTHER,			(1 <<  7)),
+	PMCMASK(UNCORE_HIT,		(1 <<  8)),
+	PMCMASK(OTHER_CORE_HIT_SNP,	(1 <<  9)),
+	PMCMASK(OTHER_CORE_HITM,	(1 << 10)),
+	PMCMASK(REMOTE_CACHE_FWD,	(1 << 12)),
+	PMCMASK(REMOTE_DRAM,		(1 << 13)),
+	PMCMASK(LOCAL_DRAM,		(1 << 14)),
+	PMCMASK(NON_DRAM,		(1 << 15)),
+	NULLMASK
+};
+
 static int
 iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *e, *p, *q;
-	uint32_t cachestate, evmask;
+	uint32_t cachestate, evmask, rsp;
 	int count, n;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
 	    PMC_CAP_QUALIFIER);
 	pmc_config->pm_md.pm_iap.pm_iap_config = 0;
 
-	cachestate = evmask = 0;
+	cachestate = evmask = rsp = 0;
 
 	/* Parse additional modifiers if present */
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 
 		n = 0;
 		if (KWPREFIXMATCH(p, IAP_KW_CMASK "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_iap.pm_iap_config |=
 			    IAP_CMASK(count);
 		} else if (KWMATCH(p, IAP_KW_EDGE)) {
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		} else if (KWMATCH(p, IAP_KW_INV)) {
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		} else if (KWMATCH(p, IAP_KW_OS)) {
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		} else if (KWMATCH(p, IAP_KW_USR)) {
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		} else if (KWMATCH(p, IAP_KW_ANYTHREAD)) {
 			pmc_config->pm_md.pm_iap.pm_iap_config |= IAP_ANY;
 		} else if (KWPREFIXMATCH(p, IAP_KW_CORE "=")) {
 			n = pmc_parse_mask(iap_core_mask, p, &evmask);
 			if (n != 1)
 				return (-1);
 		} else if (KWPREFIXMATCH(p, IAP_KW_AGENT "=")) {
 			n = pmc_parse_mask(iap_agent_mask, p, &evmask);
 			if (n != 1)
 				return (-1);
 		} else if (KWPREFIXMATCH(p, IAP_KW_PREFETCH "=")) {
 			n = pmc_parse_mask(iap_prefetch_mask, p, &evmask);
 			if (n != 1)
 				return (-1);
 		} else if (KWPREFIXMATCH(p, IAP_KW_CACHESTATE "=")) {
 			n = pmc_parse_mask(iap_cachestate_mask, p, &cachestate);
 		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_CORE &&
 		    KWPREFIXMATCH(p, IAP_KW_TRANSITION "=")) {
 			n = pmc_parse_mask(iap_transition_mask, p, &evmask);
 			if (n != 1)
 				return (-1);
 		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM ||
 		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2 ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7) {
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME) {
 			if (KWPREFIXMATCH(p, IAP_KW_SNOOPRESPONSE "=")) {
 				n = pmc_parse_mask(iap_snoopresponse_mask, p,
 				    &evmask);
 			} else if (KWPREFIXMATCH(p, IAP_KW_SNOOPTYPE "=")) {
 				n = pmc_parse_mask(iap_snooptype_mask, p,
 				    &evmask);
 			} else
 				return (-1);
+		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) {
+			if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
+				n = pmc_parse_mask(iap_rsp_mask, p, &rsp);
+			} else
+				return (-1);
 		} else
 			return (-1);
 
 		if (n < 0)	/* Parsing failed. */
 			return (-1);
 	}
 
 	pmc_config->pm_md.pm_iap.pm_iap_config |= evmask;
 
 	/*
 	 * If the event requires a 'cachestate' qualifier but was not
 	 * specified by the user, use a sensible default.
 	 */
 	switch (pe) {
 	case PMC_EV_IAP_EVENT_28H: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_29H: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_2AH: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_2BH: /* Atom, Core2 */
 	case PMC_EV_IAP_EVENT_2EH: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_30H: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_32H: /* Core */
 	case PMC_EV_IAP_EVENT_40H: /* Core */
 	case PMC_EV_IAP_EVENT_41H: /* Core */
 	case PMC_EV_IAP_EVENT_42H: /* Core, Core2, Atom */
 	case PMC_EV_IAP_EVENT_77H: /* Core */
 		if (cachestate == 0)
 			cachestate = (0xF << 8);
 	default:
 		break;
 	}
 
 	pmc_config->pm_md.pm_iap.pm_iap_config |= cachestate;
+	pmc_config->pm_md.pm_iap.pm_iap_rsp = rsp;
 
 	return (0);
 }
 
 /*
+ * Intel Uncore.
+ */
+
+static int
+ucf_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	(void) pe;
+	(void) ctrspec;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_ucf.pm_ucf_flags = 0;
+
+	return (0);
+}
+
+#define	UCP_KW_CMASK		"cmask"
+#define	UCP_KW_EDGE		"edge"
+#define	UCP_KW_INV		"inv"
+
+static int
+ucp_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *e, *p, *q;
+	int count, n;
+
+	(void) pe;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
+	    PMC_CAP_QUALIFIER);
+	pmc_config->pm_md.pm_ucp.pm_ucp_config = 0;
+
+	/* Parse additional modifiers if present */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+
+		n = 0;
+		if (KWPREFIXMATCH(p, UCP_KW_CMASK "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_ucp.pm_ucp_config |=
+			    UCP_CMASK(count);
+		} else if (KWMATCH(p, UCP_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, UCP_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else
+			return (-1);
+
+		if (n < 0)	/* Parsing failed. */
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
  * AMD K8 PMCs.
  *
  * These are very similar to AMD K7 PMCs, but support more kinds of
  * events.
  */
 
 static struct pmc_event_alias k8_aliases[] = {
 	EV_ALIAS("branches",		"k8-fr-retired-taken-branches"),
 	EV_ALIAS("branch-mispredicts",
 	    "k8-fr-retired-taken-branches-mispredicted"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("dc-misses",		"k8-dc-miss"),
 	EV_ALIAS("ic-misses",		"k8-ic-miss"),
 	EV_ALIAS("instructions",	"k8-fr-retired-x86-instructions"),
 	EV_ALIAS("interrupts",		"k8-fr-taken-hardware-interrupts"),
 	EV_ALIAS("unhalted-cycles",	"k8-bu-cpu-clk-unhalted"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	__K8MASK(N,V) PMCMASK(N,(1 << (V)))
 
 /*
  * Parsing tables
  */
 
 /* fp dispatched fpu ops */
 static const struct pmc_masks k8_mask_fdfo[] = {
 	__K8MASK(add-pipe-excluding-junk-ops,	0),
 	__K8MASK(multiply-pipe-excluding-junk-ops,	1),
 	__K8MASK(store-pipe-excluding-junk-ops,	2),
 	__K8MASK(add-pipe-junk-ops,		3),
 	__K8MASK(multiply-pipe-junk-ops,	4),
 	__K8MASK(store-pipe-junk-ops,		5),
 	NULLMASK
 };
 
 /* ls segment register loads */
 static const struct pmc_masks k8_mask_lsrl[] = {
 	__K8MASK(es,	0),
 	__K8MASK(cs,	1),
 	__K8MASK(ss,	2),
 	__K8MASK(ds,	3),
 	__K8MASK(fs,	4),
 	__K8MASK(gs,	5),
 	__K8MASK(hs,	6),
 	NULLMASK
 };
 
 /* ls locked operation */
 static const struct pmc_masks k8_mask_llo[] = {
 	__K8MASK(locked-instructions,	0),
 	__K8MASK(cycles-in-request,	1),
 	__K8MASK(cycles-to-complete,	2),
 	NULLMASK
 };
 
 /* dc refill from {l2,system} and dc copyback */
 static const struct pmc_masks k8_mask_dc[] = {
 	__K8MASK(invalid,	0),
 	__K8MASK(shared,	1),
 	__K8MASK(exclusive,	2),
 	__K8MASK(owner,		3),
 	__K8MASK(modified,	4),
 	NULLMASK
 };
 
 /* dc one bit ecc error */
 static const struct pmc_masks k8_mask_dobee[] = {
 	__K8MASK(scrubber,	0),
 	__K8MASK(piggyback,	1),
 	NULLMASK
 };
 
 /* dc dispatched prefetch instructions */
 static const struct pmc_masks k8_mask_ddpi[] = {
 	__K8MASK(load,	0),
 	__K8MASK(store,	1),
 	__K8MASK(nta,	2),
 	NULLMASK
 };
 
 /* dc dcache accesses by locks */
 static const struct pmc_masks k8_mask_dabl[] = {
 	__K8MASK(accesses,	0),
 	__K8MASK(misses,	1),
 	NULLMASK
 };
 
 /* bu internal l2 request */
 static const struct pmc_masks k8_mask_bilr[] = {
 	__K8MASK(ic-fill,	0),
 	__K8MASK(dc-fill,	1),
 	__K8MASK(tlb-reload,	2),
 	__K8MASK(tag-snoop,	3),
 	__K8MASK(cancelled,	4),
 	NULLMASK
 };
 
 /* bu fill request l2 miss */
 static const struct pmc_masks k8_mask_bfrlm[] = {
 	__K8MASK(ic-fill,	0),
 	__K8MASK(dc-fill,	1),
 	__K8MASK(tlb-reload,	2),
 	NULLMASK
 };
 
 /* bu fill into l2 */
 static const struct pmc_masks k8_mask_bfil[] = {
 	__K8MASK(dirty-l2-victim,	0),
 	__K8MASK(victim-from-l2,	1),
 	NULLMASK
 };
 
 /* fr retired fpu instructions */
 static const struct pmc_masks k8_mask_frfi[] = {
 	__K8MASK(x87,			0),
 	__K8MASK(mmx-3dnow,		1),
 	__K8MASK(packed-sse-sse2,	2),
 	__K8MASK(scalar-sse-sse2,	3),
 	NULLMASK
 };
 
 /* fr retired fastpath double op instructions */
 static const struct pmc_masks k8_mask_frfdoi[] = {
 	__K8MASK(low-op-pos-0,		0),
 	__K8MASK(low-op-pos-1,		1),
 	__K8MASK(low-op-pos-2,		2),
 	NULLMASK
 };
 
 /* fr fpu exceptions */
 static const struct pmc_masks k8_mask_ffe[] = {
 	__K8MASK(x87-reclass-microfaults,	0),
 	__K8MASK(sse-retype-microfaults,	1),
 	__K8MASK(sse-reclass-microfaults,	2),
 	__K8MASK(sse-and-x87-microtraps,	3),
 	NULLMASK
 };
 
 /* nb memory controller page access event */
 static const struct pmc_masks k8_mask_nmcpae[] = {
 	__K8MASK(page-hit,	0),
 	__K8MASK(page-miss,	1),
 	__K8MASK(page-conflict,	2),
 	NULLMASK
 };
 
 /* nb memory controller turnaround */
 static const struct pmc_masks k8_mask_nmct[] = {
 	__K8MASK(dimm-turnaround,		0),
 	__K8MASK(read-to-write-turnaround,	1),
 	__K8MASK(write-to-read-turnaround,	2),
 	NULLMASK
 };
 
 /* nb memory controller bypass saturation */
 static const struct pmc_masks k8_mask_nmcbs[] = {
 	__K8MASK(memory-controller-hi-pri-bypass,	0),
 	__K8MASK(memory-controller-lo-pri-bypass,	1),
 	__K8MASK(dram-controller-interface-bypass,	2),
 	__K8MASK(dram-controller-queue-bypass,		3),
 	NULLMASK
 };
 
 /* nb sized commands */
 static const struct pmc_masks k8_mask_nsc[] = {
 	__K8MASK(nonpostwrszbyte,	0),
 	__K8MASK(nonpostwrszdword,	1),
 	__K8MASK(postwrszbyte,		2),
 	__K8MASK(postwrszdword,		3),
 	__K8MASK(rdszbyte,		4),
 	__K8MASK(rdszdword,		5),
 	__K8MASK(rdmodwr,		6),
 	NULLMASK
 };
 
 /* nb probe result */
 static const struct pmc_masks k8_mask_npr[] = {
 	__K8MASK(probe-miss,		0),
 	__K8MASK(probe-hit,		1),
 	__K8MASK(probe-hit-dirty-no-memory-cancel, 2),
 	__K8MASK(probe-hit-dirty-with-memory-cancel, 3),
 	NULLMASK
 };
 
 /* nb hypertransport bus bandwidth */
 static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */
 	__K8MASK(command,	0),
 	__K8MASK(data,	1),
 	__K8MASK(buffer-release, 2),
 	__K8MASK(nop,	3),
 	NULLMASK
 };
 
 #undef	__K8MASK
 
 #define	K8_KW_COUNT	"count"
 #define	K8_KW_EDGE	"edge"
 #define	K8_KW_INV	"inv"
 #define	K8_KW_MASK	"mask"
 #define	K8_KW_OS	"os"
 #define	K8_KW_USR	"usr"
 
 static int
 k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char		*e, *p, *q;
 	int		n;
 	uint32_t	count, evmask;
 	const struct pmc_masks	*pm, *pmask;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
 
 	pmask = NULL;
 	evmask = 0;
 
 #define	__K8SETMASK(M) pmask = k8_mask_##M
 
 	/* setup parsing tables */
 	switch (pe) {
 	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
 		__K8SETMASK(fdfo);
 		break;
 	case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD:
 		__K8SETMASK(lsrl);
 		break;
 	case PMC_EV_K8_LS_LOCKED_OPERATION:
 		__K8SETMASK(llo);
 		break;
 	case PMC_EV_K8_DC_REFILL_FROM_L2:
 	case PMC_EV_K8_DC_REFILL_FROM_SYSTEM:
 	case PMC_EV_K8_DC_COPYBACK:
 		__K8SETMASK(dc);
 		break;
 	case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR:
 		__K8SETMASK(dobee);
 		break;
 	case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS:
 		__K8SETMASK(ddpi);
 		break;
 	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
 		__K8SETMASK(dabl);
 		break;
 	case PMC_EV_K8_BU_INTERNAL_L2_REQUEST:
 		__K8SETMASK(bilr);
 		break;
 	case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS:
 		__K8SETMASK(bfrlm);
 		break;
 	case PMC_EV_K8_BU_FILL_INTO_L2:
 		__K8SETMASK(bfil);
 		break;
 	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
 		__K8SETMASK(frfi);
 		break;
 	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
 		__K8SETMASK(frfdoi);
 		break;
 	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
 		__K8SETMASK(ffe);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT:
 		__K8SETMASK(nmcpae);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND:
 		__K8SETMASK(nmct);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION:
 		__K8SETMASK(nmcbs);
 		break;
 	case PMC_EV_K8_NB_SIZED_COMMANDS:
 		__K8SETMASK(nsc);
 		break;
 	case PMC_EV_K8_NB_PROBE_RESULT:
 		__K8SETMASK(npr);
 		break;
 	case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH:
 	case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH:
 	case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH:
 		__K8SETMASK(nhbb);
 		break;
 
 	default:
 		break;		/* no options defined */
 	}
 
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_amd.pm_amd_config |=
 			    AMD_PMC_TO_COUNTER(count);
 
 		} else if (KWMATCH(p, K8_KW_EDGE)) {
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		} else if (KWMATCH(p, K8_KW_INV)) {
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		} else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) {
 			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		} else if (KWMATCH(p, K8_KW_OS)) {
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		} else if (KWMATCH(p, K8_KW_USR)) {
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		} else
 			return (-1);
 	}
 
 	/* other post processing */
 	switch (pe) {
 	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
 	case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED:
 	case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS:
 	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
 	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
 	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
 		/* XXX only available in rev B and later */
 		break;
 	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
 		/* XXX only available in rev C and later */
 		break;
 	case PMC_EV_K8_LS_LOCKED_OPERATION:
 		/* XXX CPU Rev A,B evmask is to be zero */
 		if (evmask & (evmask - 1)) /* > 1 bit set */
 			return (-1);
 		if (evmask == 0) {
 			evmask = 0x01; /* Rev C and later: #instrs */
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 		break;
 	default:
 		if (evmask == 0 && pmask != NULL) {
 			for (pm = pmask; pm->pm_name; pm++)
 				evmask |= pm->pm_value;
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 	}
 
 	if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
 		pmc_config->pm_md.pm_amd.pm_amd_config =
 		    AMD_PMC_TO_UNITMASK(evmask);
 
 	return (0);
 }
 
 #endif
 
 #if defined(__amd64__) || defined(__i386__)
 
 /*
  * Intel P4 PMCs
  */
 
 static struct pmc_event_alias p4_aliases[] = {
 	EV_ALIAS("branches",		"p4-branch-retired,mask=mmtp+mmtm"),
 	EV_ALIAS("branch-mispredicts",	"p4-mispred-branch-retired"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("instructions",
 	    "p4-instr-retired,mask=nbogusntag+nbogustag"),
 	EV_ALIAS("unhalted-cycles",	"p4-global-power-events"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	P4_KW_ACTIVE	"active"
 #define	P4_KW_ACTIVE_ANY "any"
 #define	P4_KW_ACTIVE_BOTH "both"
 #define	P4_KW_ACTIVE_NONE "none"
 #define	P4_KW_ACTIVE_SINGLE "single"
 #define	P4_KW_BUSREQTYPE "busreqtype"
 #define	P4_KW_CASCADE	"cascade"
 #define	P4_KW_EDGE	"edge"
 #define	P4_KW_INV	"complement"
 #define	P4_KW_OS	"os"
 #define	P4_KW_MASK	"mask"
 #define	P4_KW_PRECISE	"precise"
 #define	P4_KW_TAG	"tag"
 #define	P4_KW_THRESHOLD	"threshold"
 #define	P4_KW_USR	"usr"
 
 #define	__P4MASK(N,V) PMCMASK(N, (1 << (V)))
 
 static const struct pmc_masks p4_mask_tcdm[] = { /* tc deliver mode */
 	__P4MASK(dd, 0),
 	__P4MASK(db, 1),
 	__P4MASK(di, 2),
 	__P4MASK(bd, 3),
 	__P4MASK(bb, 4),
 	__P4MASK(bi, 5),
 	__P4MASK(id, 6),
 	__P4MASK(ib, 7),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_bfr[] = { /* bpu fetch request */
 	__P4MASK(tcmiss, 0),
 	NULLMASK,
 };
 
 static const struct pmc_masks p4_mask_ir[] = { /* itlb reference */
 	__P4MASK(hit, 0),
 	__P4MASK(miss, 1),
 	__P4MASK(hit-uc, 2),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_memcan[] = { /* memory cancel */
 	__P4MASK(st-rb-full, 2),
 	__P4MASK(64k-conf, 3),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_memcomp[] = { /* memory complete */
 	__P4MASK(lsc, 0),
 	__P4MASK(ssc, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_lpr[] = { /* load port replay */
 	__P4MASK(split-ld, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_spr[] = { /* store port replay */
 	__P4MASK(split-st, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_mlr[] = { /* mob load replay */
 	__P4MASK(no-sta, 1),
 	__P4MASK(no-std, 3),
 	__P4MASK(partial-data, 4),
 	__P4MASK(unalgn-addr, 5),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_pwt[] = { /* page walk type */
 	__P4MASK(dtmiss, 0),
 	__P4MASK(itmiss, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_bcr[] = { /* bsq cache reference */
 	__P4MASK(rd-2ndl-hits, 0),
 	__P4MASK(rd-2ndl-hite, 1),
 	__P4MASK(rd-2ndl-hitm, 2),
 	__P4MASK(rd-3rdl-hits, 3),
 	__P4MASK(rd-3rdl-hite, 4),
 	__P4MASK(rd-3rdl-hitm, 5),
 	__P4MASK(rd-2ndl-miss, 8),
 	__P4MASK(rd-3rdl-miss, 9),
 	__P4MASK(wr-2ndl-miss, 10),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ia[] = { /* ioq allocation */
 	__P4MASK(all-read, 5),
 	__P4MASK(all-write, 6),
 	__P4MASK(mem-uc, 7),
 	__P4MASK(mem-wc, 8),
 	__P4MASK(mem-wt, 9),
 	__P4MASK(mem-wp, 10),
 	__P4MASK(mem-wb, 11),
 	__P4MASK(own, 13),
 	__P4MASK(other, 14),
 	__P4MASK(prefetch, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_iae[] = { /* ioq active entries */
 	__P4MASK(all-read, 5),
 	__P4MASK(all-write, 6),
 	__P4MASK(mem-uc, 7),
 	__P4MASK(mem-wc, 8),
 	__P4MASK(mem-wt, 9),
 	__P4MASK(mem-wp, 10),
 	__P4MASK(mem-wb, 11),
 	__P4MASK(own, 13),
 	__P4MASK(other, 14),
 	__P4MASK(prefetch, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_fda[] = { /* fsb data activity */
 	__P4MASK(drdy-drv, 0),
 	__P4MASK(drdy-own, 1),
 	__P4MASK(drdy-other, 2),
 	__P4MASK(dbsy-drv, 3),
 	__P4MASK(dbsy-own, 4),
 	__P4MASK(dbsy-other, 5),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ba[] = { /* bsq allocation */
 	__P4MASK(req-type0, 0),
 	__P4MASK(req-type1, 1),
 	__P4MASK(req-len0, 2),
 	__P4MASK(req-len1, 3),
 	__P4MASK(req-io-type, 5),
 	__P4MASK(req-lock-type, 6),
 	__P4MASK(req-cache-type, 7),
 	__P4MASK(req-split-type, 8),
 	__P4MASK(req-dem-type, 9),
 	__P4MASK(req-ord-type, 10),
 	__P4MASK(mem-type0, 11),
 	__P4MASK(mem-type1, 12),
 	__P4MASK(mem-type2, 13),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_sia[] = { /* sse input assist */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_psu[] = { /* packed sp uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_pdu[] = { /* packed dp uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ssu[] = { /* scalar sp uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_sdu[] = { /* scalar dp uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_64bmu[] = { /* 64 bit mmx uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_128bmu[] = { /* 128 bit mmx uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_xfu[] = { /* X87 fp uop */
 	__P4MASK(all, 15),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_xsmu[] = { /* x87 simd moves uop */
 	__P4MASK(allp0, 3),
 	__P4MASK(allp2, 4),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_gpe[] = { /* global power events */
 	__P4MASK(running, 0),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_tmx[] = { /* TC ms xfer */
 	__P4MASK(cisc, 0),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_uqw[] = { /* uop queue writes */
 	__P4MASK(from-tc-build, 0),
 	__P4MASK(from-tc-deliver, 1),
 	__P4MASK(from-rom, 2),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_rmbt[] = {
 	/* retired mispred branch type */
 	__P4MASK(conditional, 1),
 	__P4MASK(call, 2),
 	__P4MASK(return, 3),
 	__P4MASK(indirect, 4),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_rbt[] = { /* retired branch type */
 	__P4MASK(conditional, 1),
 	__P4MASK(call, 2),
 	__P4MASK(retired, 3),
 	__P4MASK(indirect, 4),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_rs[] = { /* resource stall */
 	__P4MASK(sbfull, 5),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_wb[] = { /* WC buffer */
 	__P4MASK(wcb-evicts, 0),
 	__P4MASK(wcb-full-evict, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_fee[] = { /* front end event */
 	__P4MASK(nbogus, 0),
 	__P4MASK(bogus, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ee[] = { /* execution event */
 	__P4MASK(nbogus0, 0),
 	__P4MASK(nbogus1, 1),
 	__P4MASK(nbogus2, 2),
 	__P4MASK(nbogus3, 3),
 	__P4MASK(bogus0, 4),
 	__P4MASK(bogus1, 5),
 	__P4MASK(bogus2, 6),
 	__P4MASK(bogus3, 7),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_re[] = { /* replay event */
 	__P4MASK(nbogus, 0),
 	__P4MASK(bogus, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_insret[] = { /* instr retired */
 	__P4MASK(nbogusntag, 0),
 	__P4MASK(nbogustag, 1),
 	__P4MASK(bogusntag, 2),
 	__P4MASK(bogustag, 3),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ur[] = { /* uops retired */
 	__P4MASK(nbogus, 0),
 	__P4MASK(bogus, 1),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_ut[] = { /* uop type */
 	__P4MASK(tagloads, 1),
 	__P4MASK(tagstores, 2),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_br[] = { /* branch retired */
 	__P4MASK(mmnp, 0),
 	__P4MASK(mmnm, 1),
 	__P4MASK(mmtp, 2),
 	__P4MASK(mmtm, 3),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_mbr[] = { /* mispred branch retired */
 	__P4MASK(nbogus, 0),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_xa[] = { /* x87 assist */
 	__P4MASK(fpsu, 0),
 	__P4MASK(fpso, 1),
 	__P4MASK(poao, 2),
 	__P4MASK(poau, 3),
 	__P4MASK(prea, 4),
 	NULLMASK
 };
 
 static const struct pmc_masks p4_mask_machclr[] = { /* machine clear */
 	__P4MASK(clear, 0),
 	__P4MASK(moclear, 2),
 	__P4MASK(smclear, 3),
 	NULLMASK
 };
 
 /* P4 event parser */
 static int
 p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 
 	char	*e, *p, *q;
 	int	count, has_tag, has_busreqtype, n;
 	uint32_t evmask, cccractivemask;
 	const struct pmc_masks *pm, *pmask;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	pmc_config->pm_md.pm_p4.pm_p4_cccrconfig =
 	    pmc_config->pm_md.pm_p4.pm_p4_escrconfig = 0;
 
 	pmask   = NULL;
 	evmask  = 0;
 	cccractivemask = 0x3;
 	has_tag = has_busreqtype = 0;
 
 #define	__P4SETMASK(M) do {				\
 	pmask = p4_mask_##M;				\
 } while (0)
 
 	switch (pe) {
 	case PMC_EV_P4_TC_DELIVER_MODE:
 		__P4SETMASK(tcdm);
 		break;
 	case PMC_EV_P4_BPU_FETCH_REQUEST:
 		__P4SETMASK(bfr);
 		break;
 	case PMC_EV_P4_ITLB_REFERENCE:
 		__P4SETMASK(ir);
 		break;
 	case PMC_EV_P4_MEMORY_CANCEL:
 		__P4SETMASK(memcan);
 		break;
 	case PMC_EV_P4_MEMORY_COMPLETE:
 		__P4SETMASK(memcomp);
 		break;
 	case PMC_EV_P4_LOAD_PORT_REPLAY:
 		__P4SETMASK(lpr);
 		break;
 	case PMC_EV_P4_STORE_PORT_REPLAY:
 		__P4SETMASK(spr);
 		break;
 	case PMC_EV_P4_MOB_LOAD_REPLAY:
 		__P4SETMASK(mlr);
 		break;
 	case PMC_EV_P4_PAGE_WALK_TYPE:
 		__P4SETMASK(pwt);
 		break;
 	case PMC_EV_P4_BSQ_CACHE_REFERENCE:
 		__P4SETMASK(bcr);
 		break;
 	case PMC_EV_P4_IOQ_ALLOCATION:
 		__P4SETMASK(ia);
 		has_busreqtype = 1;
 		break;
 	case PMC_EV_P4_IOQ_ACTIVE_ENTRIES:
 		__P4SETMASK(iae);
 		has_busreqtype = 1;
 		break;
 	case PMC_EV_P4_FSB_DATA_ACTIVITY:
 		__P4SETMASK(fda);
 		break;
 	case PMC_EV_P4_BSQ_ALLOCATION:
 		__P4SETMASK(ba);
 		break;
 	case PMC_EV_P4_SSE_INPUT_ASSIST:
 		__P4SETMASK(sia);
 		break;
 	case PMC_EV_P4_PACKED_SP_UOP:
 		__P4SETMASK(psu);
 		break;
 	case PMC_EV_P4_PACKED_DP_UOP:
 		__P4SETMASK(pdu);
 		break;
 	case PMC_EV_P4_SCALAR_SP_UOP:
 		__P4SETMASK(ssu);
 		break;
 	case PMC_EV_P4_SCALAR_DP_UOP:
 		__P4SETMASK(sdu);
 		break;
 	case PMC_EV_P4_64BIT_MMX_UOP:
 		__P4SETMASK(64bmu);
 		break;
 	case PMC_EV_P4_128BIT_MMX_UOP:
 		__P4SETMASK(128bmu);
 		break;
 	case PMC_EV_P4_X87_FP_UOP:
 		__P4SETMASK(xfu);
 		break;
 	case PMC_EV_P4_X87_SIMD_MOVES_UOP:
 		__P4SETMASK(xsmu);
 		break;
 	case PMC_EV_P4_GLOBAL_POWER_EVENTS:
 		__P4SETMASK(gpe);
 		break;
 	case PMC_EV_P4_TC_MS_XFER:
 		__P4SETMASK(tmx);
 		break;
 	case PMC_EV_P4_UOP_QUEUE_WRITES:
 		__P4SETMASK(uqw);
 		break;
 	case PMC_EV_P4_RETIRED_MISPRED_BRANCH_TYPE:
 		__P4SETMASK(rmbt);
 		break;
 	case PMC_EV_P4_RETIRED_BRANCH_TYPE:
 		__P4SETMASK(rbt);
 		break;
 	case PMC_EV_P4_RESOURCE_STALL:
 		__P4SETMASK(rs);
 		break;
 	case PMC_EV_P4_WC_BUFFER:
 		__P4SETMASK(wb);
 		break;
 	case PMC_EV_P4_BSQ_ACTIVE_ENTRIES:
 	case PMC_EV_P4_B2B_CYCLES:
 	case PMC_EV_P4_BNR:
 	case PMC_EV_P4_SNOOP:
 	case PMC_EV_P4_RESPONSE:
 		break;
 	case PMC_EV_P4_FRONT_END_EVENT:
 		__P4SETMASK(fee);
 		break;
 	case PMC_EV_P4_EXECUTION_EVENT:
 		__P4SETMASK(ee);
 		break;
 	case PMC_EV_P4_REPLAY_EVENT:
 		__P4SETMASK(re);
 		break;
 	case PMC_EV_P4_INSTR_RETIRED:
 		__P4SETMASK(insret);
 		break;
 	case PMC_EV_P4_UOPS_RETIRED:
 		__P4SETMASK(ur);
 		break;
 	case PMC_EV_P4_UOP_TYPE:
 		__P4SETMASK(ut);
 		break;
 	case PMC_EV_P4_BRANCH_RETIRED:
 		__P4SETMASK(br);
 		break;
 	case PMC_EV_P4_MISPRED_BRANCH_RETIRED:
 		__P4SETMASK(mbr);
 		break;
 	case PMC_EV_P4_X87_ASSIST:
 		__P4SETMASK(xa);
 		break;
 	case PMC_EV_P4_MACHINE_CLEAR:
 		__P4SETMASK(machclr);
 		break;
 	default:
 		return (-1);
 	}
 
 	/* process additional flags */
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWPREFIXMATCH(p, P4_KW_ACTIVE)) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			if (strcasecmp(q, P4_KW_ACTIVE_NONE) == 0)
 				cccractivemask = 0x0;
 			else if (strcasecmp(q, P4_KW_ACTIVE_SINGLE) == 0)
 				cccractivemask = 0x1;
 			else if (strcasecmp(q, P4_KW_ACTIVE_BOTH) == 0)
 				cccractivemask = 0x2;
 			else if (strcasecmp(q, P4_KW_ACTIVE_ANY) == 0)
 				cccractivemask = 0x3;
 			else
 				return (-1);
 
 		} else if (KWPREFIXMATCH(p, P4_KW_BUSREQTYPE)) {
 			if (has_busreqtype == 0)
 				return (-1);
 
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 			evmask = (evmask & ~0x1F) | (count & 0x1F);
 		} else if (KWMATCH(p, P4_KW_CASCADE))
 			pmc_config->pm_caps |= PMC_CAP_CASCADE;
 		else if (KWMATCH(p, P4_KW_EDGE))
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		else if (KWMATCH(p, P4_KW_INV))
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		else if (KWPREFIXMATCH(p, P4_KW_MASK "=")) {
 			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		} else if (KWMATCH(p, P4_KW_OS))
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		else if (KWMATCH(p, P4_KW_PRECISE))
 			pmc_config->pm_caps |= PMC_CAP_PRECISE;
 		else if (KWPREFIXMATCH(p, P4_KW_TAG "=")) {
 			if (has_tag == 0)
 				return (-1);
 
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 
 			pmc_config->pm_caps |= PMC_CAP_TAGGING;
 			pmc_config->pm_md.pm_p4.pm_p4_escrconfig |=
 			    P4_ESCR_TO_TAG_VALUE(count);
 		} else if (KWPREFIXMATCH(p, P4_KW_THRESHOLD "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_p4.pm_p4_cccrconfig &=
 			    ~P4_CCCR_THRESHOLD_MASK;
 			pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
 			    P4_CCCR_TO_THRESHOLD(count);
 		} else if (KWMATCH(p, P4_KW_USR))
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		else
 			return (-1);
 	}
 
 	/* other post processing */
 	if (pe == PMC_EV_P4_IOQ_ALLOCATION ||
 	    pe == PMC_EV_P4_FSB_DATA_ACTIVITY ||
 	    pe == PMC_EV_P4_BSQ_ALLOCATION)
 		pmc_config->pm_caps |= PMC_CAP_EDGE;
 
 	/* fill in thread activity mask */
 	pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
 	    P4_CCCR_TO_ACTIVE_THREAD(cccractivemask);
 
 	if (evmask)
 		pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 
 	switch (pe) {
 	case PMC_EV_P4_FSB_DATA_ACTIVITY:
 		if ((evmask & 0x06) == 0x06 ||
 		    (evmask & 0x18) == 0x18)
 			return (-1); /* can't have own+other bits together */
 		if (evmask == 0) /* default:drdy-{drv,own}+dbsy{drv,own} */
 			evmask = 0x1D;
 		break;
 	case PMC_EV_P4_MACHINE_CLEAR:
 		/* only one bit is allowed to be set */
 		if ((evmask & (evmask - 1)) != 0)
 			return (-1);
 		if (evmask == 0) {
 			evmask = 0x1;	/* 'CLEAR' */
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 		break;
 	default:
 		if (evmask == 0 && pmask) {
 			for (pm = pmask; pm->pm_name; pm++)
 				evmask |= pm->pm_value;
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 	}
 
 	pmc_config->pm_md.pm_p4.pm_p4_escrconfig =
 	    P4_ESCR_TO_EVENT_MASK(evmask);
 
 	return (0);
 }
 
 #endif
 
 #if defined(__i386__)
 
 /*
  * Pentium style PMCs
  */
 
 static struct pmc_event_alias p5_aliases[] = {
 	EV_ALIAS("branches",		"p5-taken-branches"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("dc-misses",		"p5-data-read-miss-or-write-miss"),
 	EV_ALIAS("ic-misses",		"p5-code-cache-miss"),
 	EV_ALIAS("instructions",	"p5-instructions-executed"),
 	EV_ALIAS("interrupts",		"p5-hardware-interrupts"),
 	EV_ALIAS("unhalted-cycles",
 	    "p5-number-of-cycles-not-in-halt-state"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static int
 p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	return (-1 || pe || ctrspec || pmc_config); /* shut up gcc */
 }
 
 /*
  * Pentium Pro style PMCs.  These PMCs are found in Pentium II, Pentium III,
  * and Pentium M CPUs.
  */
 
 static struct pmc_event_alias p6_aliases[] = {
 	EV_ALIAS("branches",		"p6-br-inst-retired"),
 	EV_ALIAS("branch-mispredicts",	"p6-br-miss-pred-retired"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("dc-misses",		"p6-dcu-lines-in"),
 	EV_ALIAS("ic-misses",		"p6-ifu-fetch-miss"),
 	EV_ALIAS("instructions",	"p6-inst-retired"),
 	EV_ALIAS("interrupts",		"p6-hw-int-rx"),
 	EV_ALIAS("unhalted-cycles",	"p6-cpu-clk-unhalted"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	P6_KW_CMASK	"cmask"
 #define	P6_KW_EDGE	"edge"
 #define	P6_KW_INV	"inv"
 #define	P6_KW_OS	"os"
 #define	P6_KW_UMASK	"umask"
 #define	P6_KW_USR	"usr"
 
 static struct pmc_masks p6_mask_mesi[] = {
 	PMCMASK(m,	0x01),
 	PMCMASK(e,	0x02),
 	PMCMASK(s,	0x04),
 	PMCMASK(i,	0x08),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_mesihw[] = {
 	PMCMASK(m,	0x01),
 	PMCMASK(e,	0x02),
 	PMCMASK(s,	0x04),
 	PMCMASK(i,	0x08),
 	PMCMASK(nonhw,	0x00),
 	PMCMASK(hw,	0x10),
 	PMCMASK(both,	0x30),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_hw[] = {
 	PMCMASK(nonhw,	0x00),
 	PMCMASK(hw,	0x10),
 	PMCMASK(both,	0x30),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_any[] = {
 	PMCMASK(self,	0x00),
 	PMCMASK(any,	0x20),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_ekp[] = {
 	PMCMASK(nta,	0x00),
 	PMCMASK(t1,	0x01),
 	PMCMASK(t2,	0x02),
 	PMCMASK(wos,	0x03),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_pps[] = {
 	PMCMASK(packed-and-scalar, 0x00),
 	PMCMASK(scalar,	0x01),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_mite[] = {
 	PMCMASK(packed-multiply,	 0x01),
 	PMCMASK(packed-shift,		0x02),
 	PMCMASK(pack,			0x04),
 	PMCMASK(unpack,			0x08),
 	PMCMASK(packed-logical,		0x10),
 	PMCMASK(packed-arithmetic,	0x20),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_fmt[] = {
 	PMCMASK(mmxtofp,	0x00),
 	PMCMASK(fptommx,	0x01),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_sr[] = {
 	PMCMASK(es,	0x01),
 	PMCMASK(ds,	0x02),
 	PMCMASK(fs,	0x04),
 	PMCMASK(gs,	0x08),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_eet[] = {
 	PMCMASK(all,	0x00),
 	PMCMASK(freq,	0x02),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_efur[] = {
 	PMCMASK(all,	0x00),
 	PMCMASK(loadop,	0x01),
 	PMCMASK(stdsta,	0x02),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_essir[] = {
 	PMCMASK(sse-packed-single,	0x00),
 	PMCMASK(sse-packed-single-scalar-single, 0x01),
 	PMCMASK(sse2-packed-double,	0x02),
 	PMCMASK(sse2-scalar-double,	0x03),
 	NULLMASK
 };
 
 static struct pmc_masks p6_mask_esscir[] = {
 	PMCMASK(sse-packed-single,	0x00),
 	PMCMASK(sse-scalar-single,	0x01),
 	PMCMASK(sse2-packed-double,	0x02),
 	PMCMASK(sse2-scalar-double,	0x03),
 	NULLMASK
 };
 
 /* P6 event parser */
 static int
 p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *e, *p, *q;
 	uint32_t evmask;
 	int count, n;
 	const struct pmc_masks *pm, *pmask;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	pmc_config->pm_md.pm_ppro.pm_ppro_config = 0;
 
 	evmask = 0;
 
 #define	P6MASKSET(M)	pmask = p6_mask_ ## M
 
 	switch(pe) {
 	case PMC_EV_P6_L2_IFETCH:	P6MASKSET(mesi); break;
 	case PMC_EV_P6_L2_LD:		P6MASKSET(mesi); break;
 	case PMC_EV_P6_L2_ST:		P6MASKSET(mesi); break;
 	case PMC_EV_P6_L2_RQSTS:	P6MASKSET(mesi); break;
 	case PMC_EV_P6_BUS_DRDY_CLOCKS:
 	case PMC_EV_P6_BUS_LOCK_CLOCKS:
 	case PMC_EV_P6_BUS_TRAN_BRD:
 	case PMC_EV_P6_BUS_TRAN_RFO:
 	case PMC_EV_P6_BUS_TRANS_WB:
 	case PMC_EV_P6_BUS_TRAN_IFETCH:
 	case PMC_EV_P6_BUS_TRAN_INVAL:
 	case PMC_EV_P6_BUS_TRAN_PWR:
 	case PMC_EV_P6_BUS_TRANS_P:
 	case PMC_EV_P6_BUS_TRANS_IO:
 	case PMC_EV_P6_BUS_TRAN_DEF:
 	case PMC_EV_P6_BUS_TRAN_BURST:
 	case PMC_EV_P6_BUS_TRAN_ANY:
 	case PMC_EV_P6_BUS_TRAN_MEM:
 		P6MASKSET(any);	break;
 	case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED:
 	case PMC_EV_P6_EMON_KNI_PREF_MISS:
 		P6MASKSET(ekp); break;
 	case PMC_EV_P6_EMON_KNI_INST_RETIRED:
 	case PMC_EV_P6_EMON_KNI_COMP_INST_RET:
 		P6MASKSET(pps);	break;
 	case PMC_EV_P6_MMX_INSTR_TYPE_EXEC:
 		P6MASKSET(mite); break;
 	case PMC_EV_P6_FP_MMX_TRANS:
 		P6MASKSET(fmt);	break;
 	case PMC_EV_P6_SEG_RENAME_STALLS:
 	case PMC_EV_P6_SEG_REG_RENAMES:
 		P6MASKSET(sr);	break;
 	case PMC_EV_P6_EMON_EST_TRANS:
 		P6MASKSET(eet);	break;
 	case PMC_EV_P6_EMON_FUSED_UOPS_RET:
 		P6MASKSET(efur); break;
 	case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED:
 		P6MASKSET(essir); break;
 	case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED:
 		P6MASKSET(esscir); break;
 	default:
 		pmask = NULL;
 		break;
 	}
 
 	/* Pentium M PMCs have a few events with different semantics */
 	if (cpu_info.pm_cputype == PMC_CPU_INTEL_PM) {
 		if (pe == PMC_EV_P6_L2_LD ||
 		    pe == PMC_EV_P6_L2_LINES_IN ||
 		    pe == PMC_EV_P6_L2_LINES_OUT)
 			P6MASKSET(mesihw);
 		else if (pe == PMC_EV_P6_L2_M_LINES_OUTM)
 			P6MASKSET(hw);
 	}
 
 	/* Parse additional modifiers if present */
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWPREFIXMATCH(p, P6_KW_CMASK "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_ppro.pm_ppro_config |=
 			    P6_EVSEL_TO_CMASK(count);
 		} else if (KWMATCH(p, P6_KW_EDGE)) {
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		} else if (KWMATCH(p, P6_KW_INV)) {
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		} else if (KWMATCH(p, P6_KW_OS)) {
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		} else if (KWPREFIXMATCH(p, P6_KW_UMASK "=")) {
 			evmask = 0;
 			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
 				return (-1);
 			if ((pe == PMC_EV_P6_BUS_DRDY_CLOCKS ||
 			     pe == PMC_EV_P6_BUS_LOCK_CLOCKS ||
 			     pe == PMC_EV_P6_BUS_TRAN_BRD ||
 			     pe == PMC_EV_P6_BUS_TRAN_RFO ||
 			     pe == PMC_EV_P6_BUS_TRAN_IFETCH ||
 			     pe == PMC_EV_P6_BUS_TRAN_INVAL ||
 			     pe == PMC_EV_P6_BUS_TRAN_PWR ||
 			     pe == PMC_EV_P6_BUS_TRAN_DEF ||
 			     pe == PMC_EV_P6_BUS_TRAN_BURST ||
 			     pe == PMC_EV_P6_BUS_TRAN_ANY ||
 			     pe == PMC_EV_P6_BUS_TRAN_MEM ||
 			     pe == PMC_EV_P6_BUS_TRANS_IO ||
 			     pe == PMC_EV_P6_BUS_TRANS_P ||
 			     pe == PMC_EV_P6_BUS_TRANS_WB ||
 			     pe == PMC_EV_P6_EMON_EST_TRANS ||
 			     pe == PMC_EV_P6_EMON_FUSED_UOPS_RET ||
 			     pe == PMC_EV_P6_EMON_KNI_COMP_INST_RET ||
 			     pe == PMC_EV_P6_EMON_KNI_INST_RETIRED ||
 			     pe == PMC_EV_P6_EMON_KNI_PREF_DISPATCHED ||
 			     pe == PMC_EV_P6_EMON_KNI_PREF_MISS ||
 			     pe == PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED ||
 			     pe == PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED ||
 			     pe == PMC_EV_P6_FP_MMX_TRANS)
 			    && (n > 1))	/* Only one mask keyword is allowed. */
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		} else if (KWMATCH(p, P6_KW_USR)) {
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		} else
 			return (-1);
 	}
 
 	/* post processing */
 	switch (pe) {
 
 		/*
 		 * The following events default to an evmask of 0
 		 */
 
 		/* default => 'self' */
 	case PMC_EV_P6_BUS_DRDY_CLOCKS:
 	case PMC_EV_P6_BUS_LOCK_CLOCKS:
 	case PMC_EV_P6_BUS_TRAN_BRD:
 	case PMC_EV_P6_BUS_TRAN_RFO:
 	case PMC_EV_P6_BUS_TRANS_WB:
 	case PMC_EV_P6_BUS_TRAN_IFETCH:
 	case PMC_EV_P6_BUS_TRAN_INVAL:
 	case PMC_EV_P6_BUS_TRAN_PWR:
 	case PMC_EV_P6_BUS_TRANS_P:
 	case PMC_EV_P6_BUS_TRANS_IO:
 	case PMC_EV_P6_BUS_TRAN_DEF:
 	case PMC_EV_P6_BUS_TRAN_BURST:
 	case PMC_EV_P6_BUS_TRAN_ANY:
 	case PMC_EV_P6_BUS_TRAN_MEM:
 
 		/* default => 'nta' */
 	case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED:
 	case PMC_EV_P6_EMON_KNI_PREF_MISS:
 
 		/* default => 'packed and scalar' */
 	case PMC_EV_P6_EMON_KNI_INST_RETIRED:
 	case PMC_EV_P6_EMON_KNI_COMP_INST_RET:
 
 		/* default => 'mmx to fp transitions' */
 	case PMC_EV_P6_FP_MMX_TRANS:
 
 		/* default => 'SSE Packed Single' */
 	case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED:
 	case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED:
 
 		/* default => 'all fused micro-ops' */
 	case PMC_EV_P6_EMON_FUSED_UOPS_RET:
 
 		/* default => 'all transitions' */
 	case PMC_EV_P6_EMON_EST_TRANS:
 		break;
 
 	case PMC_EV_P6_MMX_UOPS_EXEC:
 		evmask = 0x0F;		/* only value allowed */
 		break;
 
 	default:
 		/*
 		 * For all other events, set the default event mask
 		 * to a logical OR of all the allowed event mask bits.
 		 */
 		if (evmask == 0 && pmask) {
 			for (pm = pmask; pm->pm_name; pm++)
 				evmask |= pm->pm_value;
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 
 		break;
 	}
 
 	if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
 		pmc_config->pm_md.pm_ppro.pm_ppro_config |=
 		    P6_EVSEL_TO_UMASK(evmask);
 
 	return (0);
 }
 
 #endif
 
 #if	defined(__i386__) || defined(__amd64__)
 static int
 tsc_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	if (pe != PMC_EV_TSC_TSC)
 		return (-1);
 
 	/* TSC events must be unqualified. */
 	if (ctrspec && *ctrspec != '\0')
 		return (-1);
 
 	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
 	pmc_config->pm_caps |= PMC_CAP_READ;
 
 	return (0);
 }
 #endif
 
 /*
  * Match an event name `name' with its canonical form.
  *
  * Matches are case insensitive and spaces, periods, underscores and
  * hyphen characters are considered to match each other.
  *
  * Returns 1 for a match, 0 otherwise.
  */
 
 static int
 pmc_match_event_name(const char *name, const char *canonicalname)
 {
 	int cc, nc;
 	const unsigned char *c, *n;
 
 	c = (const unsigned char *) canonicalname;
 	n = (const unsigned char *) name;
 
 	for (; (nc = *n) && (cc = *c); n++, c++) {
 
 		if ((nc == ' ' || nc == '_' || nc == '-' || nc == '.') &&
 		    (cc == ' ' || cc == '_' || cc == '-' || cc == '.'))
 			continue;
 
 		if (toupper(nc) == toupper(cc))
 			continue;
 
 
 		return (0);
 	}
 
 	if (*n == '\0' && *c == '\0')
 		return (1);
 
 	return (0);
 }
 
 /*
  * Match an event name against all the event named supported by a
  * PMC class.
  *
  * Returns an event descriptor pointer on match or NULL otherwise.
  */
 static const struct pmc_event_descr *
 pmc_match_event_class(const char *name,
     const struct pmc_class_descr *pcd)
 {
 	size_t n;
 	const struct pmc_event_descr *ev;
 
 	ev = pcd->pm_evc_event_table;
 	for (n = 0; n < pcd->pm_evc_event_table_size; n++, ev++)
 		if (pmc_match_event_name(name, ev->pm_ev_name))
 			return (ev);
 
 	return (NULL);
 }
 
 static int
 pmc_mdep_is_compatible_class(enum pmc_class pc)
 {
 	size_t n;
 
 	for (n = 0; n < pmc_mdep_class_list_size; n++)
 		if (pmc_mdep_class_list[n] == pc)
 			return (1);
 	return (0);
 }
 
 /*
  * API entry points
  */
 
 int
 pmc_allocate(const char *ctrspec, enum pmc_mode mode,
     uint32_t flags, int cpu, pmc_id_t *pmcid)
 {
 	size_t n;
 	int retval;
 	char *r, *spec_copy;
 	const char *ctrname;
 	const struct pmc_event_descr *ev;
 	const struct pmc_event_alias *alias;
 	struct pmc_op_pmcallocate pmc_config;
 	const struct pmc_class_descr *pcd;
 
 	spec_copy = NULL;
 	retval    = -1;
 
 	if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
 	    mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
 		errno = EINVAL;
 		goto out;
 	}
 
 	/* replace an event alias with the canonical event specifier */
 	if (pmc_mdep_event_aliases)
 		for (alias = pmc_mdep_event_aliases; alias->pm_alias; alias++)
 			if (!strcasecmp(ctrspec, alias->pm_alias)) {
 				spec_copy = strdup(alias->pm_spec);
 				break;
 			}
 
 	if (spec_copy == NULL)
 		spec_copy = strdup(ctrspec);
 
 	r = spec_copy;
 	ctrname = strsep(&r, ",");
 
 	/*
 	 * If a explicit class prefix was given by the user, restrict the
 	 * search for the event to the specified PMC class.
 	 */
 	ev = NULL;
 	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) {
 		pcd = pmc_class_table[n];
 		if (pmc_mdep_is_compatible_class(pcd->pm_evc_class) &&
 		    strncasecmp(ctrname, pcd->pm_evc_name,
 				pcd->pm_evc_name_size) == 0) {
 			if ((ev = pmc_match_event_class(ctrname +
 			    pcd->pm_evc_name_size, pcd)) == NULL) {
 				errno = EINVAL;
 				goto out;
 			}
 			break;
 		}
 	}
 
 	/*
 	 * Otherwise, search for this event in all compatible PMC
 	 * classes.
 	 */
 	for (n = 0; ev == NULL && n < PMC_CLASS_TABLE_SIZE; n++) {
 		pcd = pmc_class_table[n];
 		if (pmc_mdep_is_compatible_class(pcd->pm_evc_class))
 			ev = pmc_match_event_class(ctrname, pcd);
 	}
 
 	if (ev == NULL) {
 		errno = EINVAL;
 		goto out;
 	}
 
 	bzero(&pmc_config, sizeof(pmc_config));
 	pmc_config.pm_ev    = ev->pm_ev_code;
 	pmc_config.pm_class = pcd->pm_evc_class;
 	pmc_config.pm_cpu   = cpu;
 	pmc_config.pm_mode  = mode;
 	pmc_config.pm_flags = flags;
 
 	if (PMC_IS_SAMPLING_MODE(mode))
 		pmc_config.pm_caps |= PMC_CAP_INTERRUPT;
 
  	if (pcd->pm_evc_allocate_pmc(ev->pm_ev_code, r, &pmc_config) < 0) {
 		errno = EINVAL;
 		goto out;
 	}
 
 	if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0)
 		goto out;
 
 	*pmcid = pmc_config.pm_pmcid;
 
 	retval = 0;
 
  out:
 	if (spec_copy)
 		free(spec_copy);
 
 	return (retval);
 }
 
 int
 pmc_attach(pmc_id_t pmc, pid_t pid)
 {
 	struct pmc_op_pmcattach pmc_attach_args;
 
 	pmc_attach_args.pm_pmc = pmc;
 	pmc_attach_args.pm_pid = pid;
 
 	return (PMC_CALL(PMCATTACH, &pmc_attach_args));
 }
 
 int
 pmc_capabilities(pmc_id_t pmcid, uint32_t *caps)
 {
 	unsigned int i;
 	enum pmc_class cl;
 
 	cl = PMC_ID_TO_CLASS(pmcid);
 	for (i = 0; i < cpu_info.pm_nclass; i++)
 		if (cpu_info.pm_classes[i].pm_class == cl) {
 			*caps = cpu_info.pm_classes[i].pm_caps;
 			return (0);
 		}
 	errno = EINVAL;
 	return (-1);
 }
 
 int
 pmc_configure_logfile(int fd)
 {
 	struct pmc_op_configurelog cla;
 
 	cla.pm_logfd = fd;
 	if (PMC_CALL(CONFIGURELOG, &cla) < 0)
 		return (-1);
 	return (0);
 }
 
 int
 pmc_cpuinfo(const struct pmc_cpuinfo **pci)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	*pci = &cpu_info;
 	return (0);
 }
 
 int
 pmc_detach(pmc_id_t pmc, pid_t pid)
 {
 	struct pmc_op_pmcattach pmc_detach_args;
 
 	pmc_detach_args.pm_pmc = pmc;
 	pmc_detach_args.pm_pid = pid;
 	return (PMC_CALL(PMCDETACH, &pmc_detach_args));
 }
 
 int
 pmc_disable(int cpu, int pmc)
 {
 	struct pmc_op_pmcadmin ssa;
 
 	ssa.pm_cpu = cpu;
 	ssa.pm_pmc = pmc;
 	ssa.pm_state = PMC_STATE_DISABLED;
 	return (PMC_CALL(PMCADMIN, &ssa));
 }
 
 int
 pmc_enable(int cpu, int pmc)
 {
 	struct pmc_op_pmcadmin ssa;
 
 	ssa.pm_cpu = cpu;
 	ssa.pm_pmc = pmc;
 	ssa.pm_state = PMC_STATE_FREE;
 	return (PMC_CALL(PMCADMIN, &ssa));
 }
 
 /*
  * Return a list of events known to a given PMC class.  'cl' is the
  * PMC class identifier, 'eventnames' is the returned list of 'const
  * char *' pointers pointing to the names of the events. 'nevents' is
  * the number of event name pointers returned.
  *
  * The space for 'eventnames' is allocated using malloc(3).  The caller
  * is responsible for freeing this space when done.
  */
 int
 pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
     int *nevents)
 {
 	int count;
 	const char **names;
 	const struct pmc_event_descr *ev;
 
 	switch (cl)
 	{
 	case PMC_CLASS_IAF:
 		ev = iaf_event_table;
 		count = PMC_EVENT_TABLE_SIZE(iaf);
 		break;
 	case PMC_CLASS_IAP:
 		/*
 		 * Return the most appropriate set of event name
 		 * spellings for the current CPU.
 		 */
 		switch (cpu_info.pm_cputype) {
 		default:
 		case PMC_CPU_INTEL_ATOM:
 			ev = atom_event_table;
 			count = PMC_EVENT_TABLE_SIZE(atom);
 			break;
 		case PMC_CPU_INTEL_CORE:
 			ev = core_event_table;
 			count = PMC_EVENT_TABLE_SIZE(core);
 			break;
 		case PMC_CPU_INTEL_CORE2:
 		case PMC_CPU_INTEL_CORE2EXTREME:
 			ev = core2_event_table;
 			count = PMC_EVENT_TABLE_SIZE(core2);
 			break;
 		case PMC_CPU_INTEL_COREI7:
 			ev = corei7_event_table;
 			count = PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmere);
+			break;
 		}
 		break;
+	case PMC_CLASS_UCF:
+		ev = ucf_event_table;
+		count = PMC_EVENT_TABLE_SIZE(ucf);
+		break;
+	case PMC_CLASS_UCP:
+		/*
+		 * Return the most appropriate set of event name
+		 * spellings for the current CPU.
+		 */
+		switch (cpu_info.pm_cputype) {
+		default:
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
+		}
+		break;
 	case PMC_CLASS_TSC:
 		ev = tsc_event_table;
 		count = PMC_EVENT_TABLE_SIZE(tsc);
 		break;
 	case PMC_CLASS_K7:
 		ev = k7_event_table;
 		count = PMC_EVENT_TABLE_SIZE(k7);
 		break;
 	case PMC_CLASS_K8:
 		ev = k8_event_table;
 		count = PMC_EVENT_TABLE_SIZE(k8);
 		break;
 	case PMC_CLASS_P4:
 		ev = p4_event_table;
 		count = PMC_EVENT_TABLE_SIZE(p4);
 		break;
 	case PMC_CLASS_P5:
 		ev = p5_event_table;
 		count = PMC_EVENT_TABLE_SIZE(p5);
 		break;
 	case PMC_CLASS_P6:
 		ev = p6_event_table;
 		count = PMC_EVENT_TABLE_SIZE(p6);
 		break;
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 
 	if ((names = malloc(count * sizeof(const char *))) == NULL)
 		return (-1);
 
 	*eventnames = names;
 	*nevents = count;
 
 	for (;count--; ev++, names++)
 		*names = ev->pm_ev_name;
 	return (0);
 }
 
 int
 pmc_flush_logfile(void)
 {
 	return (PMC_CALL(FLUSHLOG,0));
 }
 
 int
 pmc_get_driver_stats(struct pmc_driverstats *ds)
 {
 	struct pmc_op_getdriverstats gms;
 
 	if (PMC_CALL(GETDRIVERSTATS, &gms) < 0)
 		return (-1);
 
 	/* copy out fields in the current userland<->library interface */
 	ds->pm_intr_ignored    = gms.pm_intr_ignored;
 	ds->pm_intr_processed  = gms.pm_intr_processed;
 	ds->pm_intr_bufferfull = gms.pm_intr_bufferfull;
 	ds->pm_syscalls        = gms.pm_syscalls;
 	ds->pm_syscall_errors  = gms.pm_syscall_errors;
 	ds->pm_buffer_requests = gms.pm_buffer_requests;
 	ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed;
 	ds->pm_log_sweeps      = gms.pm_log_sweeps;
 	return (0);
 }
 
 int
 pmc_get_msr(pmc_id_t pmc, uint32_t *msr)
 {
 	struct pmc_op_getmsr gm;
 
 	gm.pm_pmcid = pmc;
 	if (PMC_CALL(PMCGETMSR, &gm) < 0)
 		return (-1);
 	*msr = gm.pm_msr;
 	return (0);
 }
 
 int
 pmc_init(void)
 {
 	int error, pmc_mod_id;
 	unsigned int n;
 	uint32_t abi_version;
 	struct module_stat pmc_modstat;
 	struct pmc_op_getcpuinfo op_cpu_info;
 #if defined(__amd64__) || defined(__i386__)
 	int cpu_has_iaf_counters;
 	unsigned int t;
 #endif
 
 	if (pmc_syscall != -1) /* already inited */
 		return (0);
 
 	/* retrieve the system call number from the KLD */
 	if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0)
 		return (-1);
 
 	pmc_modstat.version = sizeof(struct module_stat);
 	if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0)
 		return (-1);
 
 	pmc_syscall = pmc_modstat.data.intval;
 
 	/* check the kernel module's ABI against our compiled-in version */
 	abi_version = PMC_VERSION;
 	if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0)
 		return (pmc_syscall = -1);
 
 	/* ignore patch & minor numbers for the comparision */
 	if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) {
 		errno  = EPROGMISMATCH;
 		return (pmc_syscall = -1);
 	}
 
 	if (PMC_CALL(GETCPUINFO, &op_cpu_info) < 0)
 		return (pmc_syscall = -1);
 
 	cpu_info.pm_cputype = op_cpu_info.pm_cputype;
 	cpu_info.pm_ncpu    = op_cpu_info.pm_ncpu;
 	cpu_info.pm_npmc    = op_cpu_info.pm_npmc;
 	cpu_info.pm_nclass  = op_cpu_info.pm_nclass;
 	for (n = 0; n < cpu_info.pm_nclass; n++)
 		cpu_info.pm_classes[n] = op_cpu_info.pm_classes[n];
 
 	pmc_class_table = malloc(PMC_CLASS_TABLE_SIZE *
 	    sizeof(struct pmc_class_descr *));
 
 	if (pmc_class_table == NULL)
 		return (-1);
 
 	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++)
 		pmc_class_table[n] = NULL;
 
 	/*
 	 * Fill in the class table.
 	 */
 	n = 0;
 #if defined(__amd64__) || defined(__i386__)
 	pmc_class_table[n++] = &tsc_class_table_descr;
 
 	/*
  	 * Check if this CPU has fixed function counters.
 	 */
 	cpu_has_iaf_counters = 0;
 	for (t = 0; t < cpu_info.pm_nclass; t++)
 		if (cpu_info.pm_classes[t].pm_class == PMC_CLASS_IAF)
 			cpu_has_iaf_counters = 1;
 #endif
 
 #define	PMC_MDEP_INIT(C) do {					\
 		pmc_mdep_event_aliases    = C##_aliases;	\
 		pmc_mdep_class_list  = C##_pmc_classes;		\
 		pmc_mdep_class_list_size =			\
 		    PMC_TABLE_SIZE(C##_pmc_classes);		\
 	} while (0)
 
 #define	PMC_MDEP_INIT_INTEL_V2(C) do {					\
 		PMC_MDEP_INIT(C);					\
 		if (cpu_has_iaf_counters) 				\
 			pmc_class_table[n++] = &iaf_class_table_descr;	\
 		else							\
 			pmc_mdep_event_aliases =			\
 				C##_aliases_without_iaf;		\
 		pmc_class_table[n] = &C##_class_table_descr;		\
 	} while (0)
 
 	/* Configure the event name parser. */
 	switch (cpu_info.pm_cputype) {
 #if defined(__i386__)
 	case PMC_CPU_AMD_K7:
 		PMC_MDEP_INIT(k7);
 		pmc_class_table[n] = &k7_class_table_descr;
 		break;
 	case PMC_CPU_INTEL_P5:
 		PMC_MDEP_INIT(p5);
 		pmc_class_table[n]  = &p5_class_table_descr;
 		break;
 	case PMC_CPU_INTEL_P6:		/* P6 ... Pentium M CPUs have */
 	case PMC_CPU_INTEL_PII:		/* similar PMCs. */
 	case PMC_CPU_INTEL_PIII:
 	case PMC_CPU_INTEL_PM:
 		PMC_MDEP_INIT(p6);
 		pmc_class_table[n] = &p6_class_table_descr;
 		break;
 #endif
 #if defined(__amd64__) || defined(__i386__)
 	case PMC_CPU_AMD_K8:
 		PMC_MDEP_INIT(k8);
 		pmc_class_table[n] = &k8_class_table_descr;
 		break;
 	case PMC_CPU_INTEL_ATOM:
 		PMC_MDEP_INIT_INTEL_V2(atom);
 		break;
 	case PMC_CPU_INTEL_CORE:
 		PMC_MDEP_INIT(core);
 		pmc_class_table[n] = &core_class_table_descr;
 		break;
 	case PMC_CPU_INTEL_CORE2:
 	case PMC_CPU_INTEL_CORE2EXTREME:
 		PMC_MDEP_INIT_INTEL_V2(core2);
 		break;
 	case PMC_CPU_INTEL_COREI7:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &corei7uc_class_table_descr;
 		PMC_MDEP_INIT_INTEL_V2(corei7);
 		break;
+	case PMC_CPU_INTEL_WESTMERE:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &westmereuc_class_table_descr;
+		PMC_MDEP_INIT_INTEL_V2(westmere);
+		break;
 	case PMC_CPU_INTEL_PIV:
 		PMC_MDEP_INIT(p4);
 		pmc_class_table[n] = &p4_class_table_descr;
 		break;
 #endif
 
 
 	default:
 		/*
 		 * Some kind of CPU this version of the library knows nothing
 		 * about.  This shouldn't happen since the abi version check
 		 * should have caught this.
 		 */
 		errno = ENXIO;
 		return (pmc_syscall = -1);
 	}
 
 	return (0);
 }
 
 const char *
 pmc_name_of_capability(enum pmc_caps cap)
 {
 	int i;
 
 	/*
 	 * 'cap' should have a single bit set and should be in
 	 * range.
 	 */
 	if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST ||
 	    cap > PMC_CAP_LAST) {
 		errno = EINVAL;
 		return (NULL);
 	}
 
 	i = ffs(cap);
 	return (pmc_capability_names[i - 1]);
 }
 
 const char *
 pmc_name_of_class(enum pmc_class pc)
 {
 	if ((int) pc >= PMC_CLASS_FIRST &&
 	    pc <= PMC_CLASS_LAST)
 		return (pmc_class_names[pc]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_cputype(enum pmc_cputype cp)
 {
 	size_t n;
 
 	for (n = 0; n < PMC_TABLE_SIZE(pmc_cputype_names); n++)
 		if (cp == pmc_cputype_names[n].pm_cputype)
 			return (pmc_cputype_names[n].pm_name);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_disposition(enum pmc_disp pd)
 {
 	if ((int) pd >= PMC_DISP_FIRST &&
 	    pd <= PMC_DISP_LAST)
 		return (pmc_disposition_names[pd]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
 {
 	const struct pmc_event_descr *ev, *evfence;
 
 	ev = evfence = NULL;
 	if (pe >= PMC_EV_IAF_FIRST && pe <= PMC_EV_IAF_LAST) {
 		ev = iaf_event_table;
 		evfence = iaf_event_table + PMC_EVENT_TABLE_SIZE(iaf);
 	} else if (pe >= PMC_EV_IAP_FIRST && pe <= PMC_EV_IAP_LAST) {
 		switch (cpu) {
 		case PMC_CPU_INTEL_ATOM:
 			ev = atom_event_table;
 			evfence = atom_event_table + PMC_EVENT_TABLE_SIZE(atom);
 			break;
 		case PMC_CPU_INTEL_CORE:
 			ev = core_event_table;
 			evfence = core_event_table + PMC_EVENT_TABLE_SIZE(core);
 			break;
 		case PMC_CPU_INTEL_CORE2:
 		case PMC_CPU_INTEL_CORE2EXTREME:
 			ev = core2_event_table;
 			evfence = core2_event_table + PMC_EVENT_TABLE_SIZE(core2);
 			break;
 		case PMC_CPU_INTEL_COREI7:
 			ev = corei7_event_table;
 			evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);
+			break;
 		default:	/* Unknown CPU type. */
 			break;
 		}
-	} if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
+	} else if (pe >= PMC_EV_UCF_FIRST && pe <= PMC_EV_UCF_LAST) {
+		ev = ucf_event_table;
+		evfence = ucf_event_table + PMC_EVENT_TABLE_SIZE(ucf);
+	} else if (pe >= PMC_EV_UCP_FIRST && pe <= PMC_EV_UCP_LAST) {
+		switch (cpu) {
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
+		default:	/* Unknown CPU type. */
+			break;
+		}
+	} else if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
 		ev = k7_event_table;
 		evfence = k7_event_table + PMC_EVENT_TABLE_SIZE(k7);
 	} else if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
 		ev = k8_event_table;
 		evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8);
 	} else if (pe >= PMC_EV_P4_FIRST && pe <= PMC_EV_P4_LAST) {
 		ev = p4_event_table;
 		evfence = p4_event_table + PMC_EVENT_TABLE_SIZE(p4);
 	} else if (pe >= PMC_EV_P5_FIRST && pe <= PMC_EV_P5_LAST) {
 		ev = p5_event_table;
 		evfence = p5_event_table + PMC_EVENT_TABLE_SIZE(p5);
 	} else if (pe >= PMC_EV_P6_FIRST && pe <= PMC_EV_P6_LAST) {
 		ev = p6_event_table;
 		evfence = p6_event_table + PMC_EVENT_TABLE_SIZE(p6);
 	} else if (pe == PMC_EV_TSC_TSC) {
 		ev = tsc_event_table;
 		evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc);
 	}
 
 	for (; ev != evfence; ev++)
 		if (pe == ev->pm_ev_code)
 			return (ev->pm_ev_name);
 
 	return (NULL);
 }
 
 const char *
 pmc_name_of_event(enum pmc_event pe)
 {
 	const char *n;
 
 	if ((n = _pmc_name_of_event(pe, cpu_info.pm_cputype)) != NULL)
 		return (n);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_mode(enum pmc_mode pm)
 {
 	if ((int) pm >= PMC_MODE_FIRST &&
 	    pm <= PMC_MODE_LAST)
 		return (pmc_mode_names[pm]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_state(enum pmc_state ps)
 {
 	if ((int) ps >= PMC_STATE_FIRST &&
 	    ps <= PMC_STATE_LAST)
 		return (pmc_state_names[ps]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 int
 pmc_ncpu(void)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	return (cpu_info.pm_ncpu);
 }
 
 int
 pmc_npmc(int cpu)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (cpu_info.pm_npmc);
 }
 
 int
 pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci)
 {
 	int nbytes, npmc;
 	struct pmc_op_getpmcinfo *pmci;
 
 	if ((npmc = pmc_npmc(cpu)) < 0)
 		return (-1);
 
 	nbytes = sizeof(struct pmc_op_getpmcinfo) +
 	    npmc * sizeof(struct pmc_info);
 
 	if ((pmci = calloc(1, nbytes)) == NULL)
 		return (-1);
 
 	pmci->pm_cpu  = cpu;
 
 	if (PMC_CALL(GETPMCINFO, pmci) < 0) {
 		free(pmci);
 		return (-1);
 	}
 
 	/* kernel<->library, library<->userland interfaces are identical */
 	*ppmci = (struct pmc_pmcinfo *) pmci;
 	return (0);
 }
 
 int
 pmc_read(pmc_id_t pmc, pmc_value_t *value)
 {
 	struct pmc_op_pmcrw pmc_read_op;
 
 	pmc_read_op.pm_pmcid = pmc;
 	pmc_read_op.pm_flags = PMC_F_OLDVALUE;
 	pmc_read_op.pm_value = -1;
 
 	if (PMC_CALL(PMCRW, &pmc_read_op) < 0)
 		return (-1);
 
 	*value = pmc_read_op.pm_value;
 	return (0);
 }
 
 int
 pmc_release(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_release_args;
 
 	pmc_release_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCRELEASE, &pmc_release_args));
 }
 
 int
 pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep)
 {
 	struct pmc_op_pmcrw pmc_rw_op;
 
 	pmc_rw_op.pm_pmcid = pmc;
 	pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE;
 	pmc_rw_op.pm_value = newvalue;
 
 	if (PMC_CALL(PMCRW, &pmc_rw_op) < 0)
 		return (-1);
 
 	*oldvaluep = pmc_rw_op.pm_value;
 	return (0);
 }
 
 int
 pmc_set(pmc_id_t pmc, pmc_value_t value)
 {
 	struct pmc_op_pmcsetcount sc;
 
 	sc.pm_pmcid = pmc;
 	sc.pm_count = value;
 
 	if (PMC_CALL(PMCSETCOUNT, &sc) < 0)
 		return (-1);
 	return (0);
 }
 
 int
 pmc_start(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_start_args;
 
 	pmc_start_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCSTART, &pmc_start_args));
 }
 
 int
 pmc_stop(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_stop_args;
 
 	pmc_stop_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCSTOP, &pmc_stop_args));
 }
 
 int
 pmc_width(pmc_id_t pmcid, uint32_t *width)
 {
 	unsigned int i;
 	enum pmc_class cl;
 
 	cl = PMC_ID_TO_CLASS(pmcid);
 	for (i = 0; i < cpu_info.pm_nclass; i++)
 		if (cpu_info.pm_classes[i].pm_class == cl) {
 			*width = cpu_info.pm_classes[i].pm_width;
 			return (0);
 		}
 	errno = EINVAL;
 	return (-1);
 }
 
 int
 pmc_write(pmc_id_t pmc, pmc_value_t value)
 {
 	struct pmc_op_pmcrw pmc_write_op;
 
 	pmc_write_op.pm_pmcid = pmc;
 	pmc_write_op.pm_flags = PMC_F_NEWVALUE;
 	pmc_write_op.pm_value = value;
 	return (PMC_CALL(PMCRW, &pmc_write_op));
 }
 
 int
 pmc_writelog(uint32_t userdata)
 {
 	struct pmc_op_writelog wl;
 
 	wl.pm_userdata = userdata;
 	return (PMC_CALL(WRITELOG, &wl));
 }
Index: stable/8/lib/libpmc/pmc.corei7.3
===================================================================
--- stable/8/lib/libpmc/pmc.corei7.3	(nonexistent)
+++ stable/8/lib/libpmc/pmc.corei7.3	(revision 206702)
@@ -0,0 +1,1581 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.COREI7 3
+.Sh NAME
+.Nm pmc.corei7
+.Nd measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss COREI7 AND XEON 5500 PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+Counts the number of store buffer drains.
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.PDP_MISS
+.Pq Event 08H , Umask 40H
+Number of DTLB cache load misses where the high part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 08H , Umask 80H
+Counts number of completed large page walks due to load miss in the STLB.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.L3_DATA_MISS_UNKNOWN
+.Pq Event 0FH , Umask 01H
+Counts number of memory load instructions retired where the memory reference
+missed L3 and data source is unknown.
+Available only for CPUID signature 06_2EH
+.It Li MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM
+.Pq Event 0FH , Umask 02H
+Counts number of memory load instructions retired where the memory reference
+hit modified data in a sibling core residing on the same socket.
+.It Li MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT
+.Pq Event 0FH , Umask 08H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only
+counts locally homed lines.
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 10H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and was remotely homed. This includes both
+DRAM access and HITM in a remote socket's cache for remotely homed lines.
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 20H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and required a local socket memory
+reference. This includes locally homed cachelines that were in a modified
+state in another socket.
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and to perform I/O.
+Available only for CPUID signature 06_2EH
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on.
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Counts number of loops that cant stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 4FH
+This event counts requests originating from the core that reference a cache
+line in the last level cache. The event count includes speculative traffic
+but excludes cache line fills due to a L2 hardware-prefetch. Because cache
+hierarchy, cache sizes and other implementation-specific characteristics;
+value comparison to estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 41H
+This event counts each cache miss condition for references to the last level
+cache. The event count may include speculative traffic but excludes cache
+line fills due to L2 hardware-prefetches. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li L1D_CACHE_LD.I_STATE
+.Pq Event 40H , Umask 01H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the I (invalid) state, i.e. the read request missed the cache.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.S_STATE
+.Pq Event 40H , Umask 02H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.E_STATE
+.Pq Event 40H , Umask 04H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.M_STATE
+.Pq Event 40H , Umask 08H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.MESI
+.Pq Event 40H , Umask 0FH
+Counts L1 data cache read requests.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.S_STATE
+.Pq Event 41H , Umask 02H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.E_STATE
+.Pq Event 41H , Umask 04H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.M_STATE
+.Pq Event 41H , Umask 08H
+Counts L1 data cache store RFO requests where cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.HIT
+.Pq Event 42H , Umask 01H
+Counts retired load locks that hit in the L1 data cache or hit in an already
+allocated fill buffer.	The lock portion of the load lock transaction must
+hit in the L1D.
+The initial load will pull the lock into the L1 data cache. Counter 0, 1
+only
+.It Li L1D_CACHE_LOCK.S_STATE
+.Pq Event 42H , Umask 02H
+Counts L1 data cache retired load locks that hit the target cache line in
+the shared state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.E_STATE
+.Pq Event 42H , Umask 04H
+Counts L1 data cache retired load locks that hit the target cache line in
+the exclusive state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.M_STATE
+.Pq Event 42H , Umask 08H
+Counts L1 data cache retired load locks that hit the target cache line in
+the modified state.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.ANY
+.Pq Event 43H , Umask 01H
+Counts all references (uncached, speculated and retired) to the L1 data
+cache, including all loads and stores with any memory types. The event
+counts memory accesses only when they are actually performed. For example, a
+load blocked by unknown store address and later performed is only counted
+once.
+The event does not include non- memory accesses, such as I/O accesses.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.CACHEABLE
+.Pq Event 43H , Umask 02H
+Counts all data reads and writes (speculated and retired) from cacheable
+memory, including locked operations.
+Counter 0, 1 only
+.It Li L1D_PEND_MISS.LOAD_BUFFERS_FULL
+.Pq Event 48H , Umask 02H
+Counts cycles of L1 data cache load fill buffers full.
+Counter 0, 1 only
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls exeucted, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediciton direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts cycles when the Uops executed were issued from any ports except port
+5. Use Cmask=1 for active cycles; Cmask=0 for weighted cycles; Use CMask=1,
+Invert=1 to count P0-4 stalled cycles Use Cmask=1, Edge=1, Invert=1 to count
+P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts cycles when the Uops are executing. Use Cmask=1 for active cycles;
+Cmask=0 for weighted cycles; Use CMask=1, Invert=1 to count P0-4 stalled
+cycles Use Cmask=1, Edge=1, Invert=1 to count P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of MMX instructions retired:.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of floating point computational operations retired:
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front-end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediciton Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li BPU_CLEARS.ANY
+.Pq Event E8H , Umask 03H
+Counts all BPU clears.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events as
+June 2009 document (removed in December 2009):
+.Bl -tag -width indent
+.It Li SB_FORWARD.ANY
+.Pq Event 02H , Umask 01H
+Counts the number of store forwards.
+.It Li LOAD_BLOCK.STD
+.Pq Event 03H , Umask 01H
+Counts the number of loads blocked by a preceding store with unknown data.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 03H , Umask 04H
+Counts the number of loads blocked by a preceding store address.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 01H , Umask 04H
+Counts the cycles of store buffer drains.
+.It Li MISALIGN_MEM_REF.LOAD
+.Pq Event 05H , Umask 01H
+Counts the number of misaligned load references
+.It Li MISALIGN_MEM_REF.STORE
+.Pq Event 05H , Umask 02H
+Counts the number of misaligned store references
+.It Li MISALIGN_MEM_REF.ANY
+.Pq Event 05H , Umask 03H
+Counts the number of misaligned memory references
+.It Li STORE_BLOCKS.NOT_STA
+.Pq Event 06H , Umask 01H
+This event counts the number of load operations delayed caused by preceding
+stores whose addresses are known but whose data is unknown, and preceding
+stores that conflict with the load but which incompletely overlap the load.
+.It Li STORE_BLOCKS.STA
+.Pq Event 06H , Umask 02H
+This event counts load operations delayed caused by preceding stores whose
+addresses are unknown (STA block).
+.It Li STORE_BLOCKS.ANY
+.Pq Event 06H , Umask 0FH
+All loads delayed due to store blocks
+.It Li MEMORY_DISAMBIGURATION.RESET
+.Pq Event 09H , Umask 01H
+Counts memory disambiguration reset cycles
+.It Li MEMORY_DISAMBIGURATION.SUCCESS
+.Pq Event 09H , Umask 02H
+Counts the number of loads that memory disambiguration succeeded
+.It Li MEMORY_DISAMBIGURATION.WATCHDOG
+.Pq Event 09H , Umask 04H
+Counts the number of times the memory disambiguration watchdog kicked in.
+.It Li MEMORY_DISAMBIGURATION.WATCH_CYCLES
+.Pq Event 09H , Umask 08H
+Counts the cycles that the memory disambiguration watchdog is active.
+set invert=1, cmask = 1
+.It Li HW_INT.RCV
+.Pq Event 1DH , Umask 01H
+Number of interrupt received
+.It Li HW_INT.CYCLES_MASKED
+.Pq Event 1DH , Umask 02H
+Number of cycles interrupt are masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 1DH , Umask 04H
+Number of cycles interrupts are pending and masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 04H , Umask 04H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the E (exclusive) state. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 27H , Umask 04H
+LONGEST_LAT_CACH E.MISS
+.It Li UOPS_DECODED.DEC0
+.Pq Event 3DH , Umask 01H
+Counts micro-ops decoded by decoder 0.
+.It Li UOPS_DECODED.DEC0
+.Pq Event 01H , Umask 01H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the I state.
+Counter 0, 1 only
+.It Li 0FH
+.Pq Event 41H , Umask 41H
+L1D_CACHE_ST.MESI
+Counts L1 data cache store RFO requests.
+Counter 0, 1 only
+.It Li DTLB_MISSES.PDE_MISS
+.Pq Event 49H , Umask 20H
+Number of DTLB cache misses where the low part of the linear to physical
+address translation was missed.
+.It Li DTLB_MISSES.PDP_MISS
+.Pq Event 49H , Umask 40H
+Number of DTLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li SSE_MEM_EXEC.NTA
+.Pq Event 4BH , Umask 01H
+Counts number of SSE NTA prefetch/weakly-ordered instructions which missed
+the L1 data cache.
+.It Li SSE_MEM_EXEC.STREAMING_STORES
+.Pq Event 4BH , Umask 08H
+Counts number of SSE non temporal stores
+.It Li SFENCE_CYCLES
+.Pq Event 4DH , Umask 01H
+Counts store fence cycles
+.It Li EPT.EPDE_MISS
+.Pq Event 4FH , Umask 02H
+Counts Extended Page Directory Entry misses. The Extended Page Directory
+cache is used by Virtual Machine operating systems while the guest operating
+systems use the standard TLB caches.
+.It Li EPT.EPDPE_HIT
+.Pq Event 4FH , Umask 04H
+Counts Extended Page Directory Pointer Entry hits.
+.It Li EPT.EPDPE_MISS
+.Pq Event 4FH , Umask 08H
+Counts Extended Page Directory Pointer Entry misses. T
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li IFU_IVC.FULL
+.Pq Event 81H , Umask 01H
+Instruction Fetche unit victim cache full.
+.It Li IFU_IVC.L1I_EVICTION
+.Pq Event 81H , Umask 02H
+L1 Instruction cache evictions.
+.It Li L1I_OPPORTUNISTIC_HITS
+.Pq Event 83H , Umask 01H
+Opportunistic hits in streaming.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.PMH_BUSY_CYCLES
+.Pq Event 85H , Umask 04H
+Counts PMH busy cycles.
+.It Li ITLB_MISSES.STLB_HIT
+.Pq Event 85H , Umask 10H
+Counts the number of ITLB misses that hit in the second level TLB.
+.It Li ITLB_MISSES.PDE_MISS
+.Pq Event 85H , Umask 20H
+Number of ITLB misses where the low part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.PDP_MISS
+.Pq Event 85H , Umask 40H
+Number of ITLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 01H , Umask 80H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event B0H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.UNCACHED_MEM
+.Pq Event B0H , Umask 20H
+Counts number of offcore uncached memory requests.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event BAH , Umask 04H
+Counts number of TPR reads
+.It Li PIC_ACCESSES.TPR_WRITES
+.Pq Event BAH , Umask 02H
+Counts number of TPR writes
+one or two micro-ops. Some instructions are decoded into longer sequences
+.It Li MACHINE_CLEARS.FUSION_ASSIST
+.Pq Event C3H , Umask 10H
+Counts the number of macro-fusion assists
+Counts SIMD packed single- precision floating point Uops retired.
+.It Li BOGUS_BR
+.Pq Event E4H , Umask 01H
+Counts the number of bogus branches.
+.It Li L2_HW_PREFETCH.HIT
+.Pq Event F3H , Umask 01H
+Count L2 HW prefetcher detector hits
+.It Li L2_HW_PREFETCH.ALLOC
+.Pq Event F3H , Umask 02H
+Count L2 HW prefetcher allocations
+.It Li L2_HW_PREFETCH.DATA_TRIGGER
+.Pq Event F3H , Umask 04H
+Count L2 HW data prefetcher triggered
+.It Li L2_HW_PREFETCH.CODE_TRIGGER
+.Pq Event F3H , Umask 08H
+Count L2 HW code prefetcher triggered
+.It Li L2_HW_PREFETCH.DCA_TRIGGER
+.Pq Event F3H , Umask 10H
+Count L2 HW DCA prefetcher triggered
+.It Li L2_HW_PREFETCH.KICK_START
+.Pq Event F3H , Umask 20H
+Count L2 HW prefetcher kick started
+.It Li SQ_MISC.PROMOTION
+.Pq Event F4H , Umask 01H
+Counts the number of L2 secondary misses that hit the Super Queue.
+.It Li SQ_MISC.PROMOTION_POST_GO
+.Pq Event F4H , Umask 02H
+Counts the number of L2 secondary misses during the Super Queue filling L2.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.FILL_DROPPED
+.Pq Event F4H , Umask 08H
+Counts the number of SQ L2 fills dropped due to L2 busy.
+.It Li SEGMENT_REG_LOADS
+.Pq Event F8H , Umask 01H
+Counts number of segment register loads.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .

Property changes on: stable/8/lib/libpmc/pmc.corei7.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/8/lib/libpmc/pmc.corei7uc.3
===================================================================
--- stable/8/lib/libpmc/pmc.corei7uc.3	(nonexistent)
+++ stable/8/lib/libpmc/pmc.corei7uc.3	(revision 206702)
@@ -0,0 +1,880 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.COREI7UC 3
+.Sh NAME
+.Nm pmc.corei7uc
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain 2 classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+.Ss COREI7 AND XEON 5500 UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID.	The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID.	The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_NORMAL_FULL.READ.CH0
+.Pq Event 27H , Umask 01H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH1
+.Pq Event 27H , Umask 02H
+Uncore cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH2
+.Pq Event 27H , Umask 04H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH0
+.Pq Event 27H , Umask 08H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH1
+.Pq Event 27H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH2
+.Pq Event 27H , Umask 20H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .

Property changes on: stable/8/lib/libpmc/pmc.corei7uc.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/8/lib/libpmc/pmc.ucf.3
===================================================================
--- stable/8/lib/libpmc/pmc.ucf.3	(nonexistent)
+++ stable/8/lib/libpmc/pmc.ucf.3	(revision 206702)
@@ -0,0 +1,115 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 30, 2010
+.Os
+.Dt PMC.UCF 3
+.Sh NAME
+.Nm pmc.ucf
+.Nd measurement events for
+.Tn Intel
+uncore fixed function performance counters.
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Each fixed-function PMC measures a specific hardware event.
+The number of fixed-function PMCs implemented in a CPU can vary.
+The number of fixed-function PMCs present can be determined at runtime
+by using function
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel uncore fixed-function PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Pp
+.Ss PMC Capabilities
+Fixed-function PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta \&No
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Class Name Prefix
+These PMCs are named using a class name prefix of
+.Dq Li ucf- .
+.Ss Event Specifiers (Fixed Function PMCs)
+The fixed function PMCs are selectable using the following
+event names:
+.Bl -tag -width indent
+.It Li UCLOCK
+.Pq Fixed Function Counter 0
+The fixed-function uncore counter increments at the rate of the U-clock.
+The frequency of the uncore clock domain can be determined from the uncore
+clock ratio which is available in the PCI configuration space register at
+offset C0H under device number 0 and Function 0.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
+
+

Property changes on: stable/8/lib/libpmc/pmc.ucf.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/8/lib/libpmc/pmc.westmere.3
===================================================================
--- stable/8/lib/libpmc/pmc.westmere.3	(nonexistent)
+++ stable/8/lib/libpmc/pmc.westmere.3	(revision 206702)
@@ -0,0 +1,1329 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.WESTMERE 3
+.Sh NAME
+.Nm pmc.westmere
+.Nd measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss WESTMERE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Westmere programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LOAD_BLOCK.OVERLAP_STORE
+.Pq Event 03H , Umask 02H
+Loads that partially overlap an earlier store
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+All Store buffer stall cycles
+.It Li MISALIGN_MEMORY.STORE
+.Pq Event 05H , Umask 02H
+All store referenced with misaligned address
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.WALK_CYCLES
+.Pq Event 08H , Umask 04H
+Cycles PMH is busy with a page walk due to a load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.LOCAL_HITM
+.Pq Event 0FH , Umask 02H
+Load instructions retired that HIT modified data in sibling core (Precise
+Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT
+.Pq Event 0FH , Umask 08H
+Load instructions retired local dram and remote cache HIT data sources
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 10H
+Load instructions retired with a data source of local DRAM or locally homed
+remote cache HITM (Precise Event)
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 20H
+Load instructions retired remote DRAM and remote home-remote cache HITM
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Load instructions retired I/O (Precise Event)
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Number of loops that can not stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches..
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 02H
+Counts uncore Last Level Cache references. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 01H
+Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes
+and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.WALK_CYCLES
+.Pq Event 49H , Umask 04H
+Counts cycles of page walk due to misses in the STLB.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li EPT.WALK_CYCLES
+.Pq Event 4FH , Umask 10H
+Counts Extended Page walk cycles.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls exeucted, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediciton direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.DEMAND.READ_DATA
+.Pq Event B0H , Umask 01H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event 80H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts number of cycles there are one or more uops being executed and were
+issued on ports 0-4. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts number of cycles there are one or more uops being executed on any
+ports. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS.CODE
+.Pq Event B4H , Umask 01H
+Counts the number of snoop code requests
+.It Li SNOOPQ_REQUESTS.DATA
+.Pq Event B4H , Umask 02H
+Counts the number of snoop data requests
+.It Li SNOOPQ_REQUESTS.INVALIDATE
+.Pq Event B4H , Umask 04H
+Counts the number of snoop invalidate requests
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core.
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Use MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of floating point computational operations retired:
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of retired: MMX instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H , Umask 01H
+Counts mispredicted conditional retired calls.
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 04H
+Counts all mispredicted retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.STALL_CYCLES
+.Pq Event D1H , Umask 01H
+Counts the cycles of decoder stalls.
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front- end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediciton Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li THREAD_ACTIVE
+.Pq Event ECH , Umask 01H
+Counts cycles threads are active.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .

Property changes on: stable/8/lib/libpmc/pmc.westmere.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/8/lib/libpmc/pmc.westmereuc.3
===================================================================
--- stable/8/lib/libpmc/pmc.westmereuc.3	(nonexistent)
+++ stable/8/lib/libpmc/pmc.westmereuc.3	(revision 206702)
@@ -0,0 +1,1083 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.WESTMEREUC 3
+.Sh NAME
+.Nm pmc.westmere
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss WESTMERE UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Westmere uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_OCCUPANCY.READ_TRACKER
+.Pq Event 02H , Umask 01H
+Increments the number of queue entries (code read, data read, and RFOs) in
+the tread tracker. The GQ read tracker allocate to deallocate occupancy
+count is divided by the count to obtain the average read tracker latency.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID.	The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID.	The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li GQ_SNOOP.GOTO_S
+.Pq Event 0CH , Umask 01H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state.
+.It Li GQ_SNOOP.GOTO_I
+.Pq Event 0CH , Umask 02H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state.
+.It Li GQ_SNOOP.GOTO_S_HIT_E
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_S_HIT_F
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_S_HIT_M
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_S_HIT_S
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li GQ_SNOOP.GOTO_I_HIT_E
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_I_HIT_F
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_I_HIT_M
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_I_HIT_S
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_OCCUPANCY.ANY
+.Pq Event 2AH , Umask 07H
+Normal read request occupancy for any channel.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li IMC_RETRY.CH0
+.Pq Event 32H , Umask 01H
+Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH1
+.Pq Event 32H , Umask 02H
+Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH2
+.Pq Event 32H , Umask 04H
+Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.ANY
+.Pq Event 32H , Umask 07H
+Counts number of IMC DRAM retries from any channel. DRAM retry only occurs
+when configured in RAS mode.
+.It Li QHL_FRC_ACK_CNFLTS.IOH
+.Pq Event 33H , Umask 01H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the IOH.
+.It Li QHL_FRC_ACK_CNFLTS.REMOTE
+.Pq Event 33H , Umask 02H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the remote home.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QHL_FRC_ACK_CNFLTS.ANY
+.Pq Event 33H , Umask 07H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic.
+.It Li QHL_SLEEPS.IOH_ORDER
+.Pq Event 34H , Umask 01H
+Counts number of occurrences a request was put to sleep due to IOH ordering
+(write after read) conflicts. While in the sleep state, the request is not
+eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_ORDER
+.Pq Event 34H , Umask 02H
+Counts number of occurrences a request was put to sleep due to remote socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_ORDER
+.Pq Event 34H , Umask 04H
+Counts number of occurrences a request was put to sleep due to local socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.IOH_CONFLICT
+.Pq Event 34H , Umask 08H
+Counts number of occurrences a request was put to sleep due to IOH address
+conflicts. While in the sleep state, the request is not eligible to be
+scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_CONFLICT
+.Pq Event 34H , Umask 10H
+Counts number of occurrences a request was put to sleep due to remote socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_CONFLICT
+.Pq Event 34H , Umask 20H
+Counts number of occurrences a request was put to sleep due to local socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li ADDR_OPCODE_MATCH.IOH
+.Pq Event 35H , Umask 01H
+Counts number of requests from the IOH, address/opcode of request is
+qualified by mask value written to MSR 396H. The following mask values are
+supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.REMOTE
+.Pq Event 35H , Umask 02H
+Counts number of requests from the remote socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.LOCAL
+.Pq Event 35H , Umask 04H
+Counts number of requests from the local socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.FULL.LINK_0
+.Pq Event 42H , Umask 01H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.FULL.LINK_1
+.Pq Event 42H , Umask 04H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_THERMAL_THROTTLED
+.Pq Event 67H , Umask 01H
+Uncore cycles DRAM was throttled due to its temperature being above the
+thermal throttling threshold.
+.It Li THERMAL_THROTTLING_TEMP.CORE_0
+.Pq Event 80H , Umask 01H
+Cycles that the PCU records that core 0 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_1
+.Pq Event 80H , Umask 02H
+Cycles that the PCU records that core 1 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_2
+.Pq Event 80H , Umask 04H
+Cycles that the PCU records that core 2 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_3
+.Pq Event 80H , Umask 08H
+Cycles that the PCU records that core 3 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLED_TEMP.CORE_0
+.Pq Event 81H , Umask 01H
+Cycles that the PCU records that core 0 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_1
+.Pq Event 81H , Umask 02H
+Cycles that the PCU records that core 1 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_2
+.Pq Event 81H , Umask 04H
+Cycles that the PCU records that core 2 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_3
+.Pq Event 81H , Umask 08H
+Cycles that the PCU records that core 3 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li PROCHOT_ASSERTION
+.Pq Event 82H , Umask 01H
+Number of system assertions of PROCHOT indicating the entire processor has
+exceeded the thermal limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_0
+.Pq Event 83H , Umask 01H
+Cycles that the PCU records that core 0 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_1
+.Pq Event 83H , Umask 02H
+Cycles that the PCU records that core 1 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_2
+.Pq Event 83H , Umask 04H
+Cycles that the PCU records that core 2 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_3
+.Pq Event 83H , Umask 08H
+Cycles that the PCU records that core 3 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li TURBO_MODE.CORE_0
+.Pq Event 84H , Umask 01H
+Uncore cycles that core 0 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_1
+.Pq Event 84H , Umask 02H
+Uncore cycles that core 1 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_2
+.Pq Event 84H , Umask 04H
+Uncore cycles that core 2 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_3
+.Pq Event 84H , Umask 08H
+Uncore cycles that core 3 is operating in turbo mode.
+.It Li CYCLES_UNHALTED_L3_FLL_ENABLE
+.Pq Event 85H , Umask 02H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+enabled.
+.It Li CYCLES_UNHALTED_L3_FLL_DISABLE
+.Pq Event 86H , Umask 01H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+disabled.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .

Property changes on: stable/8/lib/libpmc/pmc.westmereuc.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/8/lib/libpmc
===================================================================
--- stable/8/lib/libpmc	(revision 206701)
+++ stable/8/lib/libpmc	(revision 206702)

Property changes on: stable/8/lib/libpmc
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libpmc:r206089