Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -175,7 +175,12 @@
 _libvgl=	libvgl
 .endif
 
+.if ${MACHINE_CPUARCH} == "aarch64"
+SUBDIR.${MK_PMC}+=	libopencsd
+.endif
+
 .if ${MACHINE_CPUARCH} == "amd64"
+SUBDIR.${MK_PMC}+=	libipt
 SUBDIR.${MK_BHYVE}+=	libvmmapi
 .endif
 
Index: lib/libipt/Makefile
===================================================================
--- /dev/null
+++ lib/libipt/Makefile
@@ -0,0 +1,77 @@
+# $FreeBSD$
+
+PACKAGE=lib${LIB}
+SHLIBDIR?=	/lib
+
+.include <src.opts.mk>
+
+LIBIPTSRC=	${SRCTOP}/contrib/libipt
+
+.PATH:	${LIBIPTSRC}/libipt/src			\
+	${LIBIPTSRC}/libipt/src/posix		\
+	${LIBIPTSRC}/libipt/internal/include	\
+	${LIBIPTSRC}/libipt/include		\
+	${LIBIPTSRC}/include
+
+LIB=	ipt
+SHLIB_MAJOR=0
+
+SRCS=	\
+	init.c				\
+	pt_asid.c			\
+	pt_block_cache.c		\
+	pt_block_decoder.c		\
+	pt_config.c			\
+	pt_cpu.c			\
+	pt_cpuid.c			\
+	pt_decoder_function.c		\
+	pt_encoder.c			\
+	pt_error.c			\
+	pt_event_queue.c		\
+	pt_ild.c			\
+	pt_image_section_cache.c	\
+	pt_image.c			\
+	pt_insn_decoder.c		\
+	pt_insn.c			\
+	pt_last_ip.c			\
+	pt_packet_decoder.c		\
+	pt_packet.c			\
+	pt_query_decoder.c		\
+	pt_retstack.c			\
+	pt_section_file.c		\
+	pt_section_posix.c		\
+	pt_section.c			\
+	pt_sync.c			\
+	pt_time.c			\
+	pt_tnt_cache.c			\
+	pt_version.c
+
+CFLAGS+=	\
+	-I${LIBIPTSRC}/libipt/internal/include/posix	\
+	-I${LIBIPTSRC}/libipt/internal/include		\
+	-I${LIBIPTSRC}/libipt/include			\
+	-I${LIBIPTSRC}/include				\
+	-I${.CURDIR}
+
+CFLAGS+=	\
+	-DPT_VERSION_BUILD=0	\
+	-DPT_VERSION_EXT=\"\"	\
+	-DPT_VERSION_MAJOR=1	\
+	-DPT_VERSION_MINOR=6
+
+INCS=	\
+	intel-pt.h	\
+	pt_cpu.h	\
+	pt_last_ip.h	\
+	pt_time.h	\
+	pt_compiler.h
+
+INCSDIR=${INCLUDEDIR}/libipt
+
+LIBADD=	
+
+WARNS?= 1
+
+HAS_TESTS=
+
+.include <bsd.lib.mk>
Index: lib/libopencsd/Makefile
===================================================================
--- /dev/null
+++ lib/libopencsd/Makefile
@@ -0,0 +1,180 @@
+# $FreeBSD$
+
+PACKAGE=lib${LIB}
+SHLIBDIR?=	/lib
+
+.include <src.opts.mk>
+
+OPENCSDSRC=	${SRCTOP}/contrib/opencsd
+
+.PATH:	${OPENCSDSRC}/decoder/source/etmv4/		\
+	${OPENCSDSRC}/decoder/source/etmv3/		\
+	${OPENCSDSRC}/decoder/source/pkt_printers/	\
+	${OPENCSDSRC}/decoder/source/mem_acc/		\
+	${OPENCSDSRC}/decoder/source/i_dec/		\
+	${OPENCSDSRC}/decoder/source/c_api/		\
+	${OPENCSDSRC}/decoder/source/ptm/		\
+	${OPENCSDSRC}/decoder/source/stm/		\
+	${OPENCSDSRC}/decoder/source/			\
+	${OPENCSDSRC}/decoder/include/opencsd/etmv4/	\
+	${OPENCSDSRC}/decoder/include/opencsd/etmv3/	\
+	${OPENCSDSRC}/decoder/include/opencsd/stm/	\
+	${OPENCSDSRC}/decoder/include/opencsd/ptm/	\
+	${OPENCSDSRC}/decoder/include/opencsd/c_api/	\
+	${OPENCSDSRC}/decoder/include/opencsd/		\
+	${OPENCSDSRC}/decoder/include
+
+LIB=	opencsd
+SHLIB_MAJOR=0
+
+# Uncomment for debugging
+#CFLAGS += -g -O0 -DDEBUG
+#CPPFLAGS += -g -O0 -DDEBUG
+#CXXFLAGS += -g -O0 -DDEBUG
+
+# ETMv3
+SRCS=	\
+	trc_cmp_cfg_etmv3.cpp		\
+	trc_pkt_decode_etmv3.cpp	\
+	trc_pkt_elem_etmv3.cpp		\
+	trc_pkt_proc_etmv3.cpp		\
+	trc_pkt_proc_etmv3_impl.cpp
+
+# ETMv4
+SRCS+=	\
+	trc_cmp_cfg_etmv4.cpp			\
+	trc_etmv4_stack_elem.cpp		\
+	trc_pkt_decode_etmv4i.cpp		\
+	trc_pkt_elem_etmv4d.cpp			\
+	trc_pkt_elem_etmv4i.cpp			\
+	trc_pkt_proc_etmv4.cpp			\
+	trc_pkt_proc_etmv4i_impl.cpp
+
+# PKT_PRINTERS
+SRCS+=	\
+	raw_frame_printer.cpp			\
+	trc_print_fact.cpp
+
+# PTM
+SRCS+=	\
+	trc_cmp_cfg_ptm.cpp			\
+	trc_pkt_decode_ptm.cpp			\
+	trc_pkt_elem_ptm.cpp			\
+	trc_pkt_proc_ptm.cpp
+
+# STM
+SRCS+=	\
+	trc_pkt_decode_stm.cpp			\
+	trc_pkt_elem_stm.cpp			\
+	trc_pkt_proc_stm.cpp
+
+# C_API
+SRCS+=	\
+	ocsd_c_api_custom_obj.cpp		\
+	ocsd_c_api.cpp
+
+# SRC
+SRCS+=	\
+	ocsd_code_follower.cpp			\
+	ocsd_dcd_tree.cpp			\
+	ocsd_error.cpp				\
+	ocsd_error_logger.cpp			\
+	ocsd_gen_elem_list.cpp			\
+	ocsd_lib_dcd_register.cpp		\
+	ocsd_msg_logger.cpp			\
+	ocsd_version.cpp			\
+	trc_component.cpp			\
+	trc_core_arch_map.cpp			\
+	trc_frame_deformatter.cpp		\
+	trc_gen_elem.cpp			\
+	trc_printable_elem.cpp			\
+	trc_ret_stack.cpp
+
+# MEM_ACC
+SRCS+=	\
+	trc_mem_acc_base.cpp			\
+	trc_mem_acc_cb.cpp			\
+	trc_mem_acc_mapper.cpp			\
+	trc_mem_acc_bufptr.cpp			\
+	trc_mem_acc_file.cpp
+
+# I_DEC
+SRCS+=	\
+	trc_i_decode.cpp			\
+	trc_idec_arminst.cpp
+
+CFLAGS+=	\
+	-I${OPENCSDSRC}/decoder/include/	\
+	-I${.CURDIR}
+
+INCS=	\
+	ocsd_if_types.h			\
+	trc_gen_elem_types.h		\
+	trc_pkt_types.h
+
+INCSDIR=${INCLUDEDIR}/opencsd
+
+APIINCS=	\
+	ocsd_c_api_cust_fact.h		\
+	ocsd_c_api_cust_impl.h		\
+	ocsd_c_api_custom.h		\
+	ocsd_c_api_types.h		\
+	opencsd_c_api.h
+
+APIINCSDIR=${INCLUDEDIR}/opencsd/c_api/
+
+ETMV4INCS=	\
+	etmv4_decoder.h			\
+	trc_cmp_cfg_etmv4.h		\
+	trc_dcd_mngr_etmv4i.h		\
+	trc_etmv4_stack_elem.h		\
+	trc_pkt_decode_etmv4i.h		\
+	trc_pkt_elem_etmv4d.h		\
+	trc_pkt_elem_etmv4i.h		\
+	trc_pkt_proc_etmv4.h		\
+	trc_pkt_types_etmv4.h
+
+ETMV4INCSDIR=${INCLUDEDIR}/opencsd/etmv4/
+
+ETMV3INCS=	\
+	etmv3_decoder.h			\
+	trc_cmp_cfg_etmv3.h		\
+	trc_dcd_mngr_etmv3.h		\
+	trc_pkt_decode_etmv3.h		\
+	trc_pkt_elem_etmv3.h		\
+	trc_pkt_proc_etmv3.h		\
+	trc_pkt_types_etmv3.h
+
+ETMV3INCSDIR=${INCLUDEDIR}/opencsd/etmv3/
+
+PTMINCS=	\
+	ptm_decoder.h			\
+	trc_cmp_cfg_ptm.h		\
+	trc_dcd_mngr_ptm.h		\
+	trc_pkt_decode_ptm.h		\
+	trc_pkt_elem_ptm.h		\
+	trc_pkt_proc_ptm.h		\
+	trc_pkt_types_ptm.h
+
+PTMINCSDIR=${INCLUDEDIR}/opencsd/ptm/
+
+STMINCS=	\
+	stm_decoder.h			\
+	trc_cmp_cfg_stm.h		\
+	trc_dcd_mngr_stm.h		\
+	trc_pkt_decode_stm.h		\
+	trc_pkt_elem_stm.h		\
+	trc_pkt_proc_stm.h		\
+	trc_pkt_types_stm.h
+
+STMINCSDIR=${INCLUDEDIR}/opencsd/stm/
+
+INCSGROUPS=INCS APIINCS ETMV3INCS ETMV4INCS PTMINCS STMINCS
+
+LIBADD= cxxrt cplusplus
+
+WARNS?= 1
+
+HAS_TESTS=
+
+.include <bsd.lib.mk>
Index: lib/libpmc/libpmc.c
===================================================================
--- lib/libpmc/libpmc.c
+++ lib/libpmc/libpmc.c
@@ -76,6 +76,10 @@
 static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
+#if defined(__amd64__)
+static int pt_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
 #if defined(__arm__)
 #if defined(__XSCALE__)
 static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
@@ -87,6 +91,8 @@
 #if defined(__aarch64__)
 static int arm64_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
+static int coresight_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__mips__)
 static int mips_allocate_pmc(enum pmc_event _pe, char* ctrspec,
@@ -239,6 +245,12 @@
 	__PMC_EV_ALIAS_SKYLAKE_XEON()
 };
 
+static const struct pmc_event_descr kabylake_event_table[] =
+{
+	/* Kabylake events are similar to Skylake */
+	__PMC_EV_ALIAS_SKYLAKE()
+};
+
 static const struct pmc_event_descr ivybridge_event_table[] =
 {
 	__PMC_EV_ALIAS_IVYBRIDGE()
@@ -336,6 +348,7 @@
 PMC_MDEP_TABLE(broadwell_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(skylake, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(skylake_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(kabylake, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP, PMC_CLASS_PT);
 PMC_MDEP_TABLE(ivybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(ivybridge_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
@@ -350,8 +363,8 @@
 PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_SOFT, PMC_CLASS_XSCALE);
 PMC_MDEP_TABLE(cortex_a8, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7);
 PMC_MDEP_TABLE(cortex_a9, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7);
-PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8);
-PMC_MDEP_TABLE(cortex_a57, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8);
+PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8, PMC_CLASS_CORESIGHT);
+PMC_MDEP_TABLE(cortex_a57, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8, PMC_CLASS_CORESIGHT);
 PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_SOFT, PMC_CLASS_MIPS24K);
 PMC_MDEP_TABLE(mips74k, MIPS74K, PMC_CLASS_SOFT, PMC_CLASS_MIPS74K);
 PMC_MDEP_TABLE(octeon, OCTEON, PMC_CLASS_SOFT, PMC_CLASS_OCTEON);
@@ -365,6 +378,16 @@
 	__PMC_EV_TSC()
 };
 
+static const struct pmc_event_descr pt_event_table[] =
+{
+	__PMC_EV_PT()
+};
+
+static const struct pmc_event_descr coresight_event_table[] =
+{
+	__PMC_EV_CORESIGHT()
+};
+
 #undef	PMC_CLASS_TABLE_DESC
 #define	PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR)	\
 static const struct pmc_class_descr NAME##_class_table_descr =	\
@@ -392,6 +415,7 @@
 PMC_CLASS_TABLE_DESC(broadwell_xeon, IAP, broadwell_xeon, iap);
 PMC_CLASS_TABLE_DESC(skylake, IAP, skylake, iap);
 PMC_CLASS_TABLE_DESC(skylake_xeon, IAP, skylake_xeon, iap);
+PMC_CLASS_TABLE_DESC(kabylake, IAP, kabylake, iap);
 PMC_CLASS_TABLE_DESC(ivybridge, IAP, ivybridge, iap);
 PMC_CLASS_TABLE_DESC(ivybridge_xeon, IAP, ivybridge_xeon, iap);
 PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap);
@@ -419,6 +443,9 @@
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
 #endif
+#if	defined(__amd64__)
+PMC_CLASS_TABLE_DESC(pt, PT, pt, pt);
+#endif
 #if	defined(__arm__)
 #if	defined(__XSCALE__)
 PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale);
@@ -429,6 +456,7 @@
 #if	defined(__aarch64__)
 PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64);
 PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64);
+PMC_CLASS_TABLE_DESC(coresight, CORESIGHT, coresight, coresight);
 #endif
 #if defined(__mips__)
 PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips);
@@ -732,6 +760,8 @@
 #define skylake_aliases_without_iaf	core2_aliases_without_iaf
 #define skylake_xeon_aliases		core2_aliases
 #define skylake_xeon_aliases_without_iaf	core2_aliases_without_iaf
+#define kabylake_aliases		core2_aliases
+#define kabylake_aliases_without_iaf	core2_aliases_without_iaf
 #define ivybridge_aliases		core2_aliases
 #define ivybridge_aliases_without_iaf	core2_aliases_without_iaf
 #define ivybridge_xeon_aliases		core2_aliases
@@ -1049,7 +1079,8 @@
 				return (-1);
 
 		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE_XEON) {
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE_XEON ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_KABYLAKE) {
 			if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
 				n = pmc_parse_mask(iap_rsp_mask_skylake, p, &rsp);
 			} else
@@ -2495,6 +2526,84 @@
 }
 #endif
 
+#if	defined(__amd64__)
+
+#define	INTEL_PT_KW_BRANCHES	"branches"
+#define	INTEL_PT_KW_TSC		"tsc"
+#define	INTEL_PT_KW_MTC		"mtc"
+#define	INTEL_PT_KW_DISRETC	"disretc"
+#define	INTEL_PT_KW_ADDRA	"addra"
+#define	INTEL_PT_KW_ADDRB	"addrb"
+
+static int
+pt_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	struct pmc_md_pt_op_pmcallocate *pm_pt;
+	uint64_t addr;
+	uint32_t addrn;
+	char *p, *q, *e;
+
+	if (pe != PMC_EV_PT_PT)
+		return (-1);
+
+	pm_pt = (struct pmc_md_pt_op_pmcallocate *)&pmc_config->pm_md.pm_pt;
+
+	addrn = 0;
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWMATCH(p, INTEL_PT_KW_BRANCHES)) {
+			pm_pt->flags |= INTEL_PT_FLAG_BRANCHES;
+		}
+
+		if (KWMATCH(p, INTEL_PT_KW_TSC)) {
+			pm_pt->flags |= INTEL_PT_FLAG_TSC;
+		}
+
+		if (KWMATCH(p, INTEL_PT_KW_MTC)) {
+			pm_pt->flags |= INTEL_PT_FLAG_MTC;
+		}
+
+		if (KWMATCH(p, INTEL_PT_KW_DISRETC)) {
+			pm_pt->flags |= INTEL_PT_FLAG_DISRETC;
+		}
+
+		if (KWPREFIXMATCH(p, INTEL_PT_KW_ADDRA "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			addr = strtoul(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pm_pt->ranges[addrn * 2] = addr;
+		}
+
+		if (KWPREFIXMATCH(p, INTEL_PT_KW_ADDRB "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			addr = strtoul(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pm_pt->ranges[addrn * 2 + 1] = addr;
+
+			if (pm_pt->ranges[addrn * 2 + 1] < pm_pt->ranges[addrn * 2])
+				return (-1);
+			addrn += 1;
+			if (addrn > PT_NADDR)
+				return (-1);
+		}
+	};
+
+	pm_pt->nranges = addrn;
+
+	pmc_config->pm_caps |= PMC_CAP_READ;
+
+	return (0);
+}
+#endif
+
 static struct pmc_event_alias generic_aliases[] = {
 	EV_ALIAS("instructions",		"SOFT-CLOCK.HARD"),
 	EV_ALIAS(NULL, NULL)
@@ -2583,6 +2692,61 @@
 
 	return (0);
 }
+
+#define	ARM_CORESIGHT_KW_ADDRA	"addra"
+#define	ARM_CORESIGHT_KW_ADDRB	"addrb"
+
+static int
+coresight_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	struct pmc_md_coresight_op_pmcallocate *pm_coresight;
+	uint64_t addr;
+	uint32_t addrn;
+	char *p, *q, *e;
+
+	if (pe != PMC_EV_CORESIGHT_CORESIGHT)
+		return (-1);
+
+	pm_coresight = (struct pmc_md_coresight_op_pmcallocate *)&pmc_config->pm_md.pm_coresight;
+
+	addrn = 0;
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWPREFIXMATCH(p, ARM_CORESIGHT_KW_ADDRA "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			addr = strtoul(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pm_coresight->ranges[addrn * 2] = addr;
+		}
+
+		if (KWPREFIXMATCH(p, ARM_CORESIGHT_KW_ADDRB "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			addr = strtoul(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pm_coresight->ranges[addrn * 2 + 1] = addr;
+
+			if (pm_coresight->ranges[addrn * 2 + 1] < pm_coresight->ranges[addrn * 2])
+				return (-1);
+			addrn += 1;
+			if (addrn > CORESIGHT_NADDR)
+				return (-1);
+		}
+	};
+
+	pm_coresight->nranges = addrn;
+
+	pmc_config->pm_caps |= PMC_CAP_READ;
+
+	return (0);
+}
 #endif
 
 #if defined(__mips__)
@@ -2780,7 +2944,8 @@
 	retval    = -1;
 
 	if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
-	    mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
+	    mode != PMC_MODE_SC && mode != PMC_MODE_TC &&
+	    mode != PMC_MODE_ST && mode != PMC_MODE_TT) {
 		errno = EINVAL;
 		goto out;
 	}
@@ -2903,6 +3068,7 @@
 int
 pmc_cpuinfo(const struct pmc_cpuinfo **pci)
 {
+
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
@@ -3023,6 +3189,10 @@
 			ev = skylake_xeon_event_table;
 			count = PMC_EVENT_TABLE_SIZE(skylake_xeon);
 			break;
+		case PMC_CPU_INTEL_KABYLAKE:
+			ev = kabylake_event_table;
+			count = PMC_EVENT_TABLE_SIZE(kabylake);
+			break;
 		case PMC_CPU_INTEL_IVYBRIDGE:
 			ev = ivybridge_event_table;
 			count = PMC_EVENT_TABLE_SIZE(ivybridge);
@@ -3086,6 +3256,10 @@
 		ev = tsc_event_table;
 		count = PMC_EVENT_TABLE_SIZE(tsc);
 		break;
+	case PMC_CLASS_PT:
+		ev = pt_event_table;
+		count = PMC_EVENT_TABLE_SIZE(pt);
+		break;
 	case PMC_CLASS_K7:
 		ev = k7_event_table;
 		count = PMC_EVENT_TABLE_SIZE(k7);
@@ -3136,6 +3310,10 @@
 			break;
 		}
 		break;
+	case PMC_CLASS_CORESIGHT:
+		ev = coresight_event_table;
+		count = PMC_EVENT_TABLE_SIZE(coresight);
+		break;
 	case PMC_CLASS_MIPS24K:
 		ev = mips24k_event_table;
 		count = PMC_EVENT_TABLE_SIZE(mips24k);
@@ -3184,12 +3362,14 @@
 int
 pmc_flush_logfile(void)
 {
+
 	return (PMC_CALL(FLUSHLOG,0));
 }
 
 int
 pmc_close_logfile(void)
 {
+
 	return (PMC_CALL(CLOSELOG,0));
 }
 
@@ -3406,6 +3586,12 @@
 	case PMC_CPU_INTEL_SKYLAKE_XEON:
 		PMC_MDEP_INIT_INTEL_V2(skylake_xeon);
 		break;
+	case PMC_CPU_INTEL_KABYLAKE:
+#if defined(__amd64__)
+		pmc_class_table[n++] = &pt_class_table_descr;
+#endif
+		PMC_MDEP_INIT_INTEL_V2(kabylake);
+		break;
 	case PMC_CPU_INTEL_IVYBRIDGE:
 		PMC_MDEP_INIT_INTEL_V2(ivybridge);
 		break;
@@ -3455,11 +3641,13 @@
 #if defined(__aarch64__)
 	case PMC_CPU_ARMV8_CORTEX_A53:
 		PMC_MDEP_INIT(cortex_a53);
-		pmc_class_table[n] = &cortex_a53_class_table_descr;
+		pmc_class_table[n++] = &cortex_a53_class_table_descr;
+		pmc_class_table[n++] = &coresight_class_table_descr;
 		break;
 	case PMC_CPU_ARMV8_CORTEX_A57:
 		PMC_MDEP_INIT(cortex_a57);
-		pmc_class_table[n] = &cortex_a57_class_table_descr;
+		pmc_class_table[n++] = &cortex_a57_class_table_descr;
+		pmc_class_table[n++] = &coresight_class_table_descr;
 		break;
 #endif
 #if defined(__mips__)
@@ -3623,6 +3811,11 @@
 			evfence = skylake_xeon_event_table +
 			    PMC_EVENT_TABLE_SIZE(skylake_xeon);
 			break;
+		case PMC_CPU_INTEL_KABYLAKE:
+			ev = kabylake_event_table;
+			evfence = kabylake_event_table +
+			    PMC_EVENT_TABLE_SIZE(kabylake);
+			break;
 		case PMC_CPU_INTEL_IVYBRIDGE:
 			ev = ivybridge_event_table;
 			evfence = ivybridge_event_table + PMC_EVENT_TABLE_SIZE(ivybridge);
@@ -3715,6 +3908,9 @@
 		default:	/* Unknown CPU type. */
 			break;
 		}
+	} else if (pe == PMC_EV_CORESIGHT_CORESIGHT) {
+		ev = coresight_event_table;
+		evfence = coresight_event_table + PMC_EVENT_TABLE_SIZE(coresight);
 	} else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) {
 		ev = mips24k_event_table;
 		evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k);
@@ -3736,6 +3932,9 @@
 	} else if (pe == PMC_EV_TSC_TSC) {
 		ev = tsc_event_table;
 		evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc);
+	} else if (pe == PMC_EV_PT_PT) {
+		ev = pt_event_table;
+		evfence = pt_event_table + PMC_EVENT_TABLE_SIZE(pt);
 	} else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) {
 		ev = soft_event_table;
 		evfence = soft_event_table + soft_event_info.pm_nevent;
@@ -3852,6 +4051,68 @@
 	return (0);
 }
 
+int
+pmc_proc_unsuspend(pmc_id_t pmc, pid_t pid)
+{
+	struct pmc_op_proc_unsuspend u;
+
+	u.pm_pmcid = pmc;
+	u.pm_pid = pid;
+
+	return (PMC_CALL(THREAD_UNSUSPEND, &u));
+}
+
+int
+pmc_read_trace(uint32_t cpu, pmc_id_t pmc,
+    pmc_value_t *cycle, pmc_value_t *offset)
+{
+	struct pmc_op_trace_read pmc_trace_read;
+
+	pmc_trace_read.pm_pmcid = pmc;
+	pmc_trace_read.pm_cpu = cpu;
+	pmc_trace_read.pm_cycle = 0;
+	pmc_trace_read.pm_offset = 0;
+
+	if (PMC_CALL(TRACE_READ, &pmc_trace_read) < 0)
+		return (-1);
+
+	*cycle = pmc_trace_read.pm_cycle;
+	*offset = pmc_trace_read.pm_offset;
+
+	return (0);
+}
+
+int
+pmc_trace_config(uint32_t cpu, pmc_id_t pmc,
+    uint64_t *ranges, uint32_t nranges)
+{
+	struct pmc_op_trace_config trc;
+
+	trc.pm_pmcid = pmc;
+	trc.pm_cpu = cpu;
+	trc.nranges = nranges;
+
+	if (nranges > PMC_FILTER_MAX_IP_RANGES)
+		return (-1);
+
+	memcpy(&trc.ranges, ranges, sizeof(uint64_t) * 2 * nranges);
+
+	if (PMC_CALL(TRACE_CONFIG, &trc) < 0)
+		return (-1);
+
+	return (0);
+}
+
+int
+pmc_log_kmap(pmc_id_t pmc)
+{
+	struct pmc_op_simple pmc_log_km;
+
+	pmc_log_km.pm_pmcid = pmc;
+
+	return (PMC_CALL(LOG_KERNEL_MAP, &pmc_log_km));
+}
+
 int
 pmc_release(pmc_id_t pmc)
 {
Index: lib/libpmc/pmc.h
===================================================================
--- lib/libpmc/pmc.h
+++ lib/libpmc/pmc.h
@@ -77,6 +77,7 @@
 int	pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags,
     int _cpu, pmc_id_t *_pmcid);
 int	pmc_attach(pmc_id_t _pmcid, pid_t _pid);
+int	pmc_proc_unsuspend(pmc_id_t pmc, pid_t pid);
 int	pmc_capabilities(pmc_id_t _pmc, uint32_t *_caps);
 int	pmc_configure_logfile(int _fd);
 int	pmc_flush_logfile(void);
@@ -88,7 +89,10 @@
 int	pmc_get_msr(pmc_id_t _pmc, uint32_t *_msr);
 int	pmc_init(void);
 int	pmc_read(pmc_id_t _pmc, pmc_value_t *_value);
+int	pmc_read_trace(uint32_t cpu, pmc_id_t pmc, pmc_value_t *cycle, pmc_value_t *offset);
+int	pmc_trace_config(uint32_t cpu, pmc_id_t pmc, uint64_t *ranges, uint32_t nranges);
 int	pmc_release(pmc_id_t _pmc);
+int	pmc_log_kmap(pmc_id_t pmc);
 int	pmc_rw(pmc_id_t _pmc, pmc_value_t _newvalue, pmc_value_t *_oldvalue);
 int	pmc_set(pmc_id_t _pmc, pmc_value_t _value);
 int	pmc_start(pmc_id_t _pmc);
Index: lib/libpmcstat/libpmcstat_image.c
===================================================================
--- lib/libpmcstat/libpmcstat_image.c
+++ lib/libpmcstat/libpmcstat_image.c
@@ -386,6 +386,21 @@
 				break;
 			}
 		}
+	} else if (eh.e_type == ET_DYN) {
+		for (i = 0; i < eh.e_phnum; i++) {
+			if (gelf_getphdr(e, i, &ph) != &ph) {
+				warnx(
+"WARNING: Retrieval of PHDR entry #%ju in \"%s\" failed: %s.",
+				    (uintmax_t) i, buffer, elf_errmsg(-1));
+				goto done;
+			}
+			switch (ph.p_type) {
+			case PT_LOAD:
+				if ((ph.p_flags & PF_X) != 0)
+					image->pi_vaddr = ph.p_vaddr & (-ph.p_align);
+				break;
+			}
+		}
 	}
 
 	/*
Index: sys/amd64/include/pmc_mdep.h
===================================================================
--- sys/amd64/include/pmc_mdep.h
+++ sys/amd64/include/pmc_mdep.h
@@ -45,6 +45,7 @@
 #include <dev/hwpmc/hwpmc_core.h>
 #include <dev/hwpmc/hwpmc_piv.h>
 #include <dev/hwpmc/hwpmc_tsc.h>
+#include <dev/hwpmc/hwpmc_pt.h>
 #include <dev/hwpmc/hwpmc_uncore.h>
 
 /*
@@ -57,6 +58,7 @@
 #define	PMC_MDEP_CLASS_INDEX_P4		2
 #define	PMC_MDEP_CLASS_INDEX_IAP	2
 #define	PMC_MDEP_CLASS_INDEX_IAF	3
+#define	PMC_MDEP_CLASS_INDEX_PT		4
 #define	PMC_MDEP_CLASS_INDEX_UCP	4
 #define	PMC_MDEP_CLASS_INDEX_UCF	5
 
@@ -70,6 +72,7 @@
  * IAF		Intel fixed-function PMCs in Core2 and later CPUs.
  * UCP		Intel Uncore programmable PMCs.
  * UCF		Intel Uncore fixed-function PMCs.
+ * PT		Intel PT event.
  */
 
 union pmc_md_op_pmcallocate  {
@@ -79,7 +82,8 @@
 	struct pmc_md_ucf_op_pmcallocate	pm_ucf;
 	struct pmc_md_ucp_op_pmcallocate	pm_ucp;
 	struct pmc_md_p4_op_pmcallocate		pm_p4;
-	uint64_t				__pad[4];
+	struct pmc_md_pt_op_pmcallocate		pm_pt;
+	uint64_t				__pad[1];
 };
 
 /* Logging */
@@ -95,6 +99,7 @@
 	struct pmc_md_ucf_pmc	pm_ucf;
 	struct pmc_md_ucp_pmc	pm_ucp;
 	struct pmc_md_p4_pmc	pm_p4;
+	struct pmc_md_pt_pmc	pm_pt;
 };
 
 #define	PMC_TRAPFRAME_TO_PC(TF)	((TF)->tf_rip)
Index: sys/arm64/coresight/coresight-cmd.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-cmd.c
@@ -0,0 +1,137 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <arm64/coresight/coresight.h>
+
+#include "coresight_if.h"
+
+extern struct coresight_device_list cs_devs;
+
+static struct coresight_device *
+coresight_next_device(struct coresight_device *cs_dev,
+    struct coresight_event *event)
+{
+	struct coresight_device *out;
+	struct endpoint *out_endp;
+	struct endpoint *endp;
+
+	TAILQ_FOREACH(endp, &cs_dev->pdata->endpoints, link) {
+		if (endp->slave != 0)
+			continue;
+
+		out = coresight_get_output_device(endp, &out_endp);
+		if (out) {
+			if (LIST_EMPTY(&event->endplist)) {
+				/* Add source device */
+				endp->cs_dev = cs_dev;
+				LIST_INSERT_HEAD(&event->endplist, endp,
+				    endplink);
+			}
+
+			/* Add output device */
+			out_endp->cs_dev = out;
+			LIST_INSERT_HEAD(&event->endplist, out_endp, endplink);
+
+			return (out);
+		}
+	}
+
+	return (NULL);
+}
+
+static int
+coresight_build_list(struct coresight_device *cs_dev,
+    struct coresight_event *event)
+{
+	struct coresight_device *out;
+
+	out = cs_dev;
+	while (out)
+		out = coresight_next_device(out, event);
+
+	return (0);
+}
+
+int
+coresight_init_event(int cpu, struct coresight_event *event)
+{
+	struct coresight_device *cs_dev;
+
+	/* Start building path from source device */
+	TAILQ_FOREACH(cs_dev, &cs_devs, link) {
+		if (cs_dev->dev_type == event->src &&
+		    cs_dev->pdata->cpu == cpu) {
+			LIST_INIT(&event->endplist);
+			coresight_build_list(cs_dev, event);
+			break;
+		}
+	}
+
+	return (0);
+}
+
+void
+coresight_enable(int cpu, struct coresight_event *event)
+{
+
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink)
+		CORESIGHT_ENABLE(endp->cs_dev->dev, endp, event);
+}
+
+void
+coresight_disable(int cpu, struct coresight_event *event)
+{
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink)
+		CORESIGHT_DISABLE(endp->cs_dev->dev, endp, event);
+}
+
+void
+coresight_read(int cpu, struct coresight_event *event)
+{
+	struct endpoint *endp;
+
+	LIST_FOREACH(endp, &event->endplist, endplink)
+		CORESIGHT_READ(endp->cs_dev->dev, endp, event);
+}
Index: sys/arm64/coresight/coresight-cpu-debug.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-cpu-debug.c
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <arm64/coresight/coresight.h>
+
+#define	EDPCSR				0x0a0
+#define	EDCIDSR				0x0a4
+#define	EDVIDSR				0x0a8
+#define	EDPCSR_HI			0x0ac
+#define	EDOSLAR				0x300
+#define	EDPRCR				0x310
+#define	 EDPRCR_COREPURQ		(1 << 3)
+#define	 EDPRCR_CORENPDRQ		(1 << 0)
+#define	EDPRSR				0x314
+#define	EDDEVID1			0xfc4
+#define	EDDEVID				0xfc8
+
+static struct ofw_compat_data compat_data[] = {
+	{ "arm,coresight-cpu-debug",		1 },
+	{ NULL,					0 }
+};
+
+struct debug_softc {
+	struct resource		*res;
+};
+
+static struct resource_spec debug_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+static int
+debug_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Coresight CPU Debug");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+debug_attach(device_t dev)
+{
+	struct debug_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	if (bus_alloc_resources(dev, debug_spec, &sc->res) != 0) {
+		device_printf(dev, "cannot allocate resources for device\n");
+		return (ENXIO);
+	}
+
+	/* Enable CPU debug for current CPU only */
+	if (device_get_unit(dev) != 0)
+		return (0);
+
+	/* Unlock Coresight */
+	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+
+	wmb();
+
+	/* Unlock Debug */
+	bus_write_4(sc->res, EDOSLAR, 0);
+
+	wmb();
+
+	/* Enable power */
+	reg = bus_read_4(sc->res, EDPRCR);
+	reg |= EDPRCR_COREPURQ;
+	bus_write_4(sc->res, EDPRCR, reg);
+
+	do {
+		reg = bus_read_4(sc->res, EDPRSR);
+	} while ((reg & EDPRCR_CORENPDRQ) == 0);
+
+	return (0);
+}
+
+static device_method_t debug_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		debug_probe),
+	DEVMETHOD(device_attach,	debug_attach),
+	DEVMETHOD_END
+};
+
+static driver_t debug_driver = {
+	"debug",
+	debug_methods,
+	sizeof(struct debug_softc),
+};
+
+static devclass_t debug_devclass;
+
+EARLY_DRIVER_MODULE(debug, simplebus, debug_driver, debug_devclass,
+    0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_LATE);
+MODULE_VERSION(debug, 1);
Index: sys/arm64/coresight/coresight-dynamic-replicator.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-dynamic-replicator.c
@@ -0,0 +1,165 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <arm64/coresight/coresight.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include "coresight_if.h"
+
+#define	REPLICATOR_IDFILTER0	0x00
+#define	REPLICATOR_IDFILTER1	0x04
+
+static struct ofw_compat_data compat_data[] = {
+	{ "arm,coresight-dynamic-replicator",	1 },
+	{ NULL,					0 }
+};
+
+struct replicator_softc {
+	struct resource			*res;
+	struct coresight_platform_data	*pdata;
+};
+
+static struct resource_spec replicator_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+static int
+replicator_enable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct replicator_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	/* Enable the port. Keep the other port disabled */
+
+	if (endp->reg == 0) {
+		bus_write_4(sc->res, REPLICATOR_IDFILTER0, 0x00);
+		bus_write_4(sc->res, REPLICATOR_IDFILTER1, 0xff);
+	} else {
+		bus_write_4(sc->res, REPLICATOR_IDFILTER0, 0xff);
+		bus_write_4(sc->res, REPLICATOR_IDFILTER1, 0x00);
+	}
+
+	return (0);
+}
+
+static void
+replicator_disable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct replicator_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	bus_write_4(sc->res, REPLICATOR_IDFILTER0, 0xff);
+	bus_write_4(sc->res, REPLICATOR_IDFILTER1, 0xff);
+}
+
+static int
+replicator_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Coresight Dynamic Replicator");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+replicator_attach(device_t dev)
+{
+	struct replicator_softc *sc;
+	struct coresight_desc desc;
+
+	sc = device_get_softc(dev);
+
+	if (bus_alloc_resources(dev, replicator_spec, &sc->res) != 0) {
+		device_printf(dev, "cannot allocate resources for device\n");
+		return (ENXIO);
+	}
+
+	sc->pdata = coresight_get_platform_data(dev);
+
+	desc.pdata = sc->pdata;
+	desc.dev = dev;
+	desc.dev_type = CORESIGHT_DYNAMIC_REPLICATOR;
+	coresight_register(&desc);
+
+	/* Unlock Coresight */
+	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+
+	wmb();
+
+	return (0);
+}
+
+static device_method_t replicator_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		replicator_probe),
+	DEVMETHOD(device_attach,	replicator_attach),
+
+	/* Coresight interface */
+	DEVMETHOD(coresight_enable,	replicator_enable),
+	DEVMETHOD(coresight_disable,	replicator_disable),
+	DEVMETHOD_END
+};
+
+static driver_t replicator_driver = {
+	"replicator",
+	replicator_methods,
+	sizeof(struct replicator_softc),
+};
+
+static devclass_t replicator_devclass;
+
+DRIVER_MODULE(replicator, simplebus, replicator_driver, replicator_devclass,
+    0, 0);
+MODULE_VERSION(replicator, 1);
Index: sys/arm64/coresight/coresight-etm4x.h
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-etm4x.h
@@ -0,0 +1,170 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_ARM64_CORESIGHT_ETM4X_H_
+#define	_ARM64_CORESIGHT_ETM4X_H_
+
+#define	TRCPRGCTLR		0x004 /* Trace Programming Control Register */
+#define	 TRCPRGCTLR_EN		(1 << 0) /* Trace unit enable bit */
+#define	TRCPROCSELR		0x008 /* Trace PE Select Control Register */
+#define	TRCSTATR		0x00C /* Trace Trace Status Register */
+#define	 TRCSTATR_PMSTABLE	(1 << 1) /* The programmers' model is stable. */
+#define	 TRCSTATR_IDLE		(1 << 0) /* The trace unit is idle. */
+#define	TRCCONFIGR		0x010 /* Trace Trace Configuration Register */
+#define	 TRCCONFIGR_DV		(1 << 17) /* Data value tracing is enabled when INSTP0 is not 0b00 */
+#define	 TRCCONFIGR_DA		(1 << 16) /* Data address tracing is enabled when INSTP0 is not 0b00. */
+#define	 TRCCONFIGR_VMIDOPT	(1 << 15) /* Control bit to configure the Virtual context identifier value */
+#define	 TRCCONFIGR_QE_S	13 /* Q element enable field */
+#define	 TRCCONFIGR_QE_M	(0x3 << TRCCONFIGR_QE_S)
+#define	 TRCCONFIGR_RS		(1 << 12) /* Return stack enable bit */
+#define	 TRCCONFIGR_TS		(1 << 11) /* Global timestamp tracing is enabled. */
+#define	 TRCCONFIGR_COND_S	8 /* Conditional instruction tracing bit. */
+#define	 TRCCONFIGR_COND_M	(0x7 << TRCCONFIGR_COND_S)
+#define	 TRCCONFIGR_COND_DIS	0
+#define	 TRCCONFIGR_COND_LDR	(1 << TRCCONFIGR_COND_S) /* Conditional load instructions are traced. */
+#define	 TRCCONFIGR_COND_STR	(2 << TRCCONFIGR_COND_S) /* Conditional store instructions are traced. */
+#define	 TRCCONFIGR_COND_LDRSTR	(3 << TRCCONFIGR_COND_S) /* Conditional load and store instructions are traced. */
+#define	 TRCCONFIGR_COND_ALL	(7 << TRCCONFIGR_COND_S) /* All conditional instructions are traced. */
+#define	 TRCCONFIGR_VMID	(1 << 7) /* Virtual context identifier tracing is enabled. */
+#define	 TRCCONFIGR_CID		(1 << 6) /* Context ID tracing is enabled. */
+#define	 TRCCONFIGR_CCI		(1 << 4) /* Cycle counting in the instruction trace is enabled. */
+#define	 TRCCONFIGR_BB		(1 << 3) /* Branch broadcast mode is enabled. */
+#define	 TRCCONFIGR_INSTP0_S	1 /* Instruction P0 field. */
+#define	 TRCCONFIGR_INSTP0_M	(0x3 << TRCCONFIGR_INSTP0_S)
+#define	 TRCCONFIGR_INSTP0_NONE	0 /* Do not trace load and store instructions as P0 instructions. */
+#define	 TRCCONFIGR_INSTP0_LDR	(1 << TRCCONFIGR_INSTP0_S) /* Trace load instructions as P0 instructions. */
+#define	 TRCCONFIGR_INSTP0_STR	(2 << TRCCONFIGR_INSTP0_S) /* Trace store instructions as P0 instructions. */
+#define	 TRCCONFIGR_INSTP0_LDRSTR (3 << TRCCONFIGR_INSTP0_S) /* Trace load and store instructions as P0 instr. */
+#define	TRCAUXCTLR		0x018 /* Trace Auxiliary Control Register */
+#define	TRCEVENTCTL0R		0x020 /* Trace Event Control 0 Register */
+#define	TRCEVENTCTL1R		0x024 /* Trace Event Control 1 Register */
+#define	TRCSTALLCTLR		0x02C /* Trace Stall Control Register */
+#define	TRCTSCTLR		0x030 /* Trace Global Timestamp Control Register */
+#define	TRCSYNCPR		0x034 /* Trace Synchronization Period Register */
+#define	TRCCCCTLR		0x038 /* Trace Cycle Count Control Register */
+#define	TRCBBCTLR		0x03C /* Trace Branch Broadcast Control Register */
+#define	TRCTRACEIDR		0x040 /* Trace Trace ID Register */
+#define	TRCQCTLR		0x044 /* Trace Q Element Control Register */
+#define	 TRCQCTLR_MODE_INC	(1 << 8) /* Include mode. */
+#define	TRCVICTLR		0x080 /* Trace ViewInst Main Control Register */
+#define	 TRCVICTLR_SSSTATUS	(1 << 9) /* The start/stop logic is in the started state. */
+#define	 TRCVICTLR_EXLEVEL_NS_S	20
+#define	 TRCVICTLR_EXLEVEL_NS_M	(0xf << TRCVICTLR_EXLEVEL_NS_S)
+#define	 TRCVICTLR_EXLEVEL_NS(n) (0x1 << ((n) + TRCVICTLR_EXLEVEL_NS_S))
+#define	 TRCVICTLR_EXLEVEL_S_S	16
+#define	 TRCVICTLR_EXLEVEL_S_M	(0xf << TRCVICTLR_EXLEVEL_S_S)
+#define	 TRCVICTLR_EXLEVEL_S(n)	(0x1 << ((n) + TRCVICTLR_EXLEVEL_S_S))
+#define	 EVENT_SEL_S		0
+#define	 EVENT_SEL_M		(0x1f << EVENT_SEL_S)
+#define	TRCVIIECTLR		0x084 /* Trace ViewInst Include/Exclude Control Register */
+#define	 TRCVIIECTLR_INCLUDE_S	0
+#define	TRCVISSCTLR		0x088 /* Trace ViewInst Start/Stop Control Register */
+#define	TRCVIPCSSCTLR		0x08C /* Trace ViewInst Start/Stop PE Comparator Control Register */
+#define	TRCVDCTLR		0x0A0 /* Trace ViewData Main Control Register */
+#define	 TRCVDCTLR_TRCEXDATA	(1 << 12) /* Exception and exception return data transfers are traced */
+#define	 TRCVDCTLR_TBI		(1 << 11) /* The trace unit assigns bits[63:56] to have the same value as bits[63:56] of the data address. */
+#define	 TRCVDCTLR_PCREL	(1 << 10) /* The trace unit does not trace the address or value portions of PC-relative transfers. */
+#define	 TRCVDCTLR_SPREL_S	8
+#define	 TRCVDCTLR_SPREL_M	(0x3 << TRCVDCTLR_SPREL_S)
+#define	 TRCVDCTLR_EVENT_S	0
+#define	 TRCVDCTLR_EVENT_M	(0xff << TRCVDCTLR_EVENT_S)
+#define	TRCVDSACCTLR		0x0A4 /* Trace ViewData Include/Exclude Single Address Comparator Control Register */
+#define	TRCVDARCCTLR		0x0A8 /* Trace ViewData Include/Exclude Address Range Comparator Control Register */
+#define	TRCSEQEVR(n)		(0x100 + (n) * 0x4)	/* Trace Sequencer State Transition Control Register [n=0-2] */
+#define	TRCSEQRSTEVR		0x118 /* Trace Sequencer Reset Control Register */
+#define	TRCSEQSTR		0x11C /* Trace Sequencer State Register */
+#define	TRCEXTINSELR		0x120 /* Trace External Input Select Register */
+#define	TRCCNTRLDVR(n)		(0x140 + (n) * 0x4) /* 32 Trace Counter Reload Value Register [n=0-3] */
+#define	TRCCNTCTLR(n)		(0x150 + (n) * 0x4) /* 32 Trace Counter Control Register [n=0-3] */
+#define	TRCCNTVR(n)		(0x160 + (n) * 0x4) /* 32 Trace Counter Value Register [n=0-3] */
+#define	TRCIMSPEC(n)		(0x1C0 + (n) * 0x4)	/* Trace IMPLEMENTATION DEFINED register [n=0-7] */
+
+#define	TRCIDR0(n)		(0x1E0 + 0x4 * (n))
+#define	TRCIDR8(n)		(0x180 + 0x4 * (n))
+#define	TRCIDR(n)		((n > 7) ? TRCIDR8(n) : TRCIDR0(n))
+#define	 TRCIDR1_TRCARCHMAJ_S	8
+#define	 TRCIDR1_TRCARCHMAJ_M	(0xf << TRCIDR1_TRCARCHMAJ_S)
+#define	 TRCIDR1_TRCARCHMIN_S	4
+#define	 TRCIDR1_TRCARCHMIN_M	(0xf << TRCIDR1_TRCARCHMIN_S)
+
+#define	TRCRSCTLR(n)		(0x200 + (n) * 0x4) /* Trace Resource Selection Control Register [n=2-31] */
+#define	TRCSSCCR(n)		(0x280 + (n) * 0x4) /* Trace Single-shot Comparator Control Register [n=0-7] */
+#define	TRCSSCSR(n)		(0x2A0 + (n) * 0x4) /* Trace Single-shot Comparator Status Register [n=0-7] */
+#define	TRCSSPCICR(n)		(0x2C0 + (n) * 0x4) /* Trace Single-shot PE Comparator Input Control [n=0-7] */
+#define	TRCOSLAR		0x300 /* Management OS Lock Access Register */
+#define	TRCOSLSR		0x304 /* Management OS Lock Status Register */
+#define	TRCPDCR			0x310 /* Management PowerDown Control Register */
+#define	TRCPDSR			0x314 /* Management PowerDown Status Register */
+#define	TRCACVR(n)		(0x400 + (n) * 0x8) /* Trace Address Comparator Value Register [n=0-15] */
+#define	TRCACATR(n)		(0x480 + (n) * 0x8) /* Trace Address Comparator Access Type Register [n=0-15] */
+#define	 TRCACATR_DTBM		(1 << 21)
+#define	 TRCACATR_DATARANGE	(1 << 20)
+#define	 TRCACATR_DATASIZE_S	18
+#define	 TRCACATR_DATASIZE_M	(0x3 << TRCACATR_DATASIZE_S)
+#define	 TRCACATR_DATASIZE_B	(0x0 << TRCACATR_DATASIZE_S)
+#define	 TRCACATR_DATASIZE_HW	(0x1 << TRCACATR_DATASIZE_S)
+#define	 TRCACATR_DATASIZE_W	(0x2 << TRCACATR_DATASIZE_S)
+#define	 TRCACATR_DATASIZE_DW	(0x3 << TRCACATR_DATASIZE_S)
+#define	 TRCACATR_DATAMATCH_S	16
+#define	 TRCACATR_DATAMATCH_M	(0x3 << TRCACATR_DATAMATCH_S)
+#define	 TRCACATR_EXLEVEL_S_S	8
+#define	 TRCACATR_EXLEVEL_S_M	(0xf << TRCACATR_EXLEVEL_S_S)
+#define	 TRCACATR_EXLEVEL_S(n)	(0x1 << ((n) + TRCACATR_EXLEVEL_S_S))
+#define	 TRCACATR_EXLEVEL_NS_S	12
+#define	 TRCACATR_EXLEVEL_NS_M	(0xf << TRCACATR_EXLEVEL_NS_S)
+#define	 TRCACATR_EXLEVEL_NS(n)	(0x1 << ((n) + TRCACATR_EXLEVEL_NS_S))
+#define	TRCDVCVR(n)		(0x500 + (n) * 0x8) /* Trace Data Value Comparator Value Register [n=0-7] */
+#define	TRCDVCMR(n)		(0x580 + (n) * 0x8) /* Trace Data Value Comparator Mask Register [n=0-7] */
+#define	TRCCIDCVR(n)		(0x600 + (n) * 0x8) /* Trace Context ID Comparator Value Register [n=0-7] */
+#define	TRCVMIDCVR(n)		(0x640 + (n) * 0x8) /* Trace Virtual context identifier Comparator Value [n=0-7] */
+#define	TRCCIDCCTLR0		0x680 /* Trace Context ID Comparator Control Register 0 */
+#define	TRCCIDCCTLR1		0x684 /* Trace Context ID Comparator Control Register 1 */
+#define	TRCVMIDCCTLR0		0x688 /* Trace Virtual context identifier Comparator Control Register 0 */
+#define	TRCVMIDCCTLR1		0x68C /* Trace Virtual context identifier Comparator Control Register 1 */
+#define	TRCITCTRL		0xF00 /* Management Integration Mode Control register */
+#define	TRCCLAIMSET		0xFA0 /* Trace Claim Tag Set register */
+#define	TRCCLAIMCLR		0xFA4 /* Trace Claim Tag Clear register */
+#define	TRCDEVAFF0		0xFA8 /* Management Device Affinity register 0 */
+#define	TRCDEVAFF1		0xFAC /* Management Device Affinity register 1 */
+#define	TRCLAR			0xFB0 /* Management Software Lock Access Register */
+#define	TRCLSR			0xFB4 /* Management Software Lock Status Register */
+#define	TRCAUTHSTATUS		0xFB8 /* Management Authentication Status register */
+#define	TRCDEVARCH		0xFBC /* Management Device Architecture register */
+#define	TRCDEVID		0xFC8 /* Management Device ID register */
+#define	TRCDEVTYPE		0xFCC /* Management Device Type register */
+#define	TRCPIDR4		0xFD0 /* Management Peripheral ID4 Register */
+#define	TRCPIDR(n)		(0xFE0 + (n) * 0x4)	/* Management Peripheral IDn Register [n=0-3] */
+#define	TRCPIDR567(n)		(0xFD4 + ((n) - 5) * 0x4) /*  Management Peripheral ID5 to Peripheral ID7 Registers */
+#define	TRCCIDR(n)		(0xFF0 + (n) * 0x4)	/* Management Component IDn Register [n=0-4] */
+
+#endif /* !_ARM64_CORESIGHT_ETM4X_H_ */
Index: sys/arm64/coresight/coresight-etm4x.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-etm4x.c
@@ -0,0 +1,327 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <arm64/coresight/coresight.h>
+#include <arm64/coresight/coresight-etm4x.h>
+
+#include "coresight_if.h"
+
+#define	ETM_DEBUG
+#undef ETM_DEBUG
+   
+#ifdef ETM_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+/*
+ * Typical trace flow:
+ *
+ * CPU0 -> ETM0 -> funnel1 -> funnel0 -> ETF -> replicator -> ETR -> DRAM
+ * CPU1 -> ETM1 -> funnel1 -^
+ * CPU2 -> ETM2 -> funnel1 -^
+ * CPU3 -> ETM3 -> funnel1 -^
+ */
+
+static struct ofw_compat_data compat_data[] = {
+	{ "arm,coresight-etm4x",		1 },
+	{ NULL,					0 }
+};
+
+struct etm_softc {
+	struct resource			*res;
+	struct coresight_platform_data	*pdata;
+};
+
+static struct resource_spec etm_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+static void
+etm_unlock(struct etm_softc *sc)
+{
+
+	/* Unlocking Coresight */
+	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+
+	isb();
+
+	/* Unlocking ETM */
+	bus_write_4(sc->res, TRCOSLAR, 0);
+
+	isb();
+}
+
+static int
+etm_start(device_t dev)
+{
+	struct etm_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	/* Enable the trace unit */
+	bus_write_4(sc->res, TRCPRGCTLR, 1);
+
+	/* Wait for an IDLE bit to be LOW */
+	do {
+		reg = bus_read_4(sc->res, TRCSTATR);
+	} while ((reg & TRCSTATR_IDLE) == 1);
+
+	if ((bus_read_4(sc->res, TRCPRGCTLR) & 1) == 0)
+		panic("etm is not enabled\n");
+
+	return (0);
+}
+
+static int
+etm_stop(device_t dev)
+{
+	struct etm_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	/* Disable the trace unit */
+	bus_write_4(sc->res, TRCPRGCTLR, 0);
+
+	/* Wait for an IDLE bit */
+	do {
+		reg = bus_read_4(sc->res, TRCSTATR);
+	} while ((reg & TRCSTATR_IDLE) == 0);
+
+	return (0);
+}
+
+static int
+etm_prepare(device_t dev, struct coresight_event *config)
+{
+	struct etm_softc *sc;
+	uint32_t reg;
+	int i;
+
+	sc = device_get_softc(dev);
+
+	/* Configure ETM */
+
+	/*
+	 * Enable the return stack, global timestamping,
+	 * Context ID, and Virtual context identifier tracing.
+	 */
+	reg = TRCCONFIGR_RS | TRCCONFIGR_TS;
+	reg |= TRCCONFIGR_CID | TRCCONFIGR_VMID;
+	reg |= TRCCONFIGR_INSTP0_LDRSTR;
+	reg |= TRCCONFIGR_COND_ALL;
+	bus_write_4(sc->res, TRCCONFIGR, reg);
+
+	/* Disable all event tracing. */
+	bus_write_4(sc->res, TRCEVENTCTL0R, 0);
+	bus_write_4(sc->res, TRCEVENTCTL1R, 0);
+
+	/* Disable stalling, if implemented. */
+	bus_write_4(sc->res, TRCSTALLCTLR, 0);
+
+	/* Enable trace synchronization every 4096 bytes of trace. */
+	bus_write_4(sc->res, TRCSYNCPR, 0xC);
+
+	/* Set a value for the trace ID, with bit[0]=0. */
+	bus_write_4(sc->res, TRCTRACEIDR, 0x10);
+
+	/*
+	 * Disable the timestamp event. The trace unit still generates
+	 * timestamps due to other reasons such as trace synchronization.
+	 */
+	bus_write_4(sc->res, TRCTSCTLR, 0);
+
+	/*
+	 * Enable ViewInst to trace everything, with the start/stop
+	 * logic started.
+	 */
+	reg = TRCVICTLR_SSSTATUS;
+
+	/* The number of the single resource used to activate the event. */
+	reg |= (1 << EVENT_SEL_S);
+
+	if (config->excp_level > 2)
+		return (-1);
+
+	reg |= TRCVICTLR_EXLEVEL_NS_M;
+	reg &= ~TRCVICTLR_EXLEVEL_NS(config->excp_level);
+	reg |= TRCVICTLR_EXLEVEL_S_M;
+	reg &= ~TRCVICTLR_EXLEVEL_S(config->excp_level);
+	bus_write_4(sc->res, TRCVICTLR, reg);
+
+	for (i = 0; i < config->naddr * 2; i++) {
+		dprintf("configure range %d, address %lx\n", i, config->addr[i]);
+		bus_write_8(sc->res, TRCACVR(i), config->addr[i]);
+
+		reg = 0;
+		/* Secure state */
+		reg |= TRCACATR_EXLEVEL_S_M;
+		reg &= ~TRCACATR_EXLEVEL_S(config->excp_level);
+		/* Non-secure state */
+		reg |= TRCACATR_EXLEVEL_NS_M;
+		reg &= ~TRCACATR_EXLEVEL_NS(config->excp_level);
+		bus_write_4(sc->res, TRCACATR(i), reg);
+
+		/* Address range is included */
+		reg = bus_read_4(sc->res, TRCVIIECTLR);
+		reg |= (1 << (TRCVIIECTLR_INCLUDE_S + i / 2));
+		bus_write_4(sc->res, TRCVIIECTLR, reg);
+	}
+
+	/* No address filtering for ViewData. */
+	bus_write_4(sc->res, TRCVDARCCTLR, 0);
+
+	/* Clear the STATUS bit to zero */
+	bus_write_4(sc->res, TRCSSCSR(0), 0);
+
+	if (config->naddr == 0) {
+		/* No address range filtering for ViewInst. */
+		bus_write_4(sc->res, TRCVIIECTLR, 0);
+	}
+
+	/* No start or stop points for ViewInst. */
+	bus_write_4(sc->res, TRCVISSCTLR, 0);
+
+	/* Disable ViewData */
+	bus_write_4(sc->res, TRCVDCTLR, 0);
+
+	/* No address filtering for ViewData. */
+	bus_write_4(sc->res, TRCVDSACCTLR, 0);
+
+	return (0);
+}
+
+static int
+etm_enable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+
+	etm_prepare(dev, event);
+	etm_start(dev);
+
+	return (0);
+}
+
+static void
+etm_disable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+
+	etm_stop(dev);
+}
+
+static int
+etm_probe(device_t dev)
+{
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "AArch64 Embedded Trace Macrocell");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+etm_attach(device_t dev)
+{
+	struct coresight_desc desc;
+	struct etm_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	if (bus_alloc_resources(dev, etm_spec, &sc->res) != 0) {
+		device_printf(dev, "cannot allocate resources for device\n");
+		return (ENXIO);
+	}
+
+	if (device_get_unit(dev) == 0) {
+		/* TODO */
+		etm_unlock(sc);
+		reg = bus_read_4(sc->res, TRCIDR(1));
+		if (bootverbose)
+			printf("ETM Version: %d.%d\n",
+			    (reg & TRCIDR1_TRCARCHMAJ_M) >> TRCIDR1_TRCARCHMAJ_S,
+			    (reg & TRCIDR1_TRCARCHMIN_M) >> TRCIDR1_TRCARCHMIN_S);
+	}
+
+	sc->pdata = coresight_get_platform_data(dev);
+
+	desc.pdata = sc->pdata;
+	desc.dev = dev;
+	desc.dev_type = CORESIGHT_ETMV4;
+	coresight_register(&desc);
+
+	return (0);
+}
+
+static device_method_t etm_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		etm_probe),
+	DEVMETHOD(device_attach,	etm_attach),
+
+	/* Coresight interface */
+	DEVMETHOD(coresight_enable,	etm_enable),
+	DEVMETHOD(coresight_disable,	etm_disable),
+	DEVMETHOD_END
+};
+
+static driver_t etm_driver = {
+	"etm",
+	etm_methods,
+	sizeof(struct etm_softc),
+};
+
+static devclass_t etm_devclass;
+
+DRIVER_MODULE(etm, simplebus, etm_driver, etm_devclass, 0, 0);
+MODULE_VERSION(etm, 1);
Index: sys/arm64/coresight/coresight-funnel.h
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-funnel.h
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_ARM64_CORESIGHT_CORESIGHT_FUNNEL_H_
+#define	_ARM64_CORESIGHT_CORESIGHT_FUNNEL_H_
+
+#define	FUNNEL_FUNCTL		0x000 /* Funnel Control Register */
+#define	 FUNCTL_HOLDTIME_SHIFT	8
+#define	 FUNCTL_HOLDTIME_MASK	(0xf << FUNCTL_HOLDTIME_SHIFT)
+#define	FUNNEL_PRICTL		0x004 /* Priority Control Register */
+#define	FUNNEL_ITATBDATA0	0xEEC /* Integration Register, ITATBDATA0 */
+#define	FUNNEL_ITATBCTR2	0xEF0 /* Integration Register, ITATBCTR2 */
+#define	FUNNEL_ITATBCTR1	0xEF4 /* Integration Register, ITATBCTR1 */
+#define	FUNNEL_ITATBCTR0	0xEF8 /* Integration Register, ITATBCTR0 */
+#define	FUNNEL_IMCR		0xF00 /* Integration Mode Control Register */
+#define	FUNNEL_CTSR		0xFA0 /* Claim Tag Set Register */
+#define	FUNNEL_CTCR		0xFA4 /* Claim Tag Clear Register */
+#define	FUNNEL_LOCKACCESS	0xFB0 /* Lock Access */
+#define	FUNNEL_LOCKSTATUS	0xFB4 /* Lock Status */
+#define	FUNNEL_AUTHSTATUS	0xFB8 /* Authentication status */
+#define	FUNNEL_DEVICEID		0xFC8 /* Device ID */
+#define	FUNNEL_DEVICETYPE	0xFCC /* Device Type Identifier */
+#define	FUNNEL_PERIPH4		0xFD0 /* Peripheral ID4 */
+#define	FUNNEL_PERIPH5		0xFD4 /* Peripheral ID5 */
+#define	FUNNEL_PERIPH6		0xFD8 /* Peripheral ID6 */
+#define	FUNNEL_PERIPH7		0xFDC /* Peripheral ID7 */
+#define	FUNNEL_PERIPH0		0xFE0 /* Peripheral ID0 */
+#define	FUNNEL_PERIPH1		0xFE4 /* Peripheral ID1 */
+#define	FUNNEL_PERIPH2		0xFE8 /* Peripheral ID2 */
+#define	FUNNEL_PERIPH3		0xFEC /* Peripheral ID3 */
+#define	FUNNEL_COMP0		0xFF0 /* Component ID0 */
+#define	FUNNEL_COMP1		0xFF4 /* Component ID1 */
+#define	FUNNEL_COMP2		0xFF8 /* Component ID2 */
+#define	FUNNEL_COMP3		0xFFC /* Component ID3 */
+
+#endif /* !_ARM64_CORESIGHT_CORESIGHT_FUNNEL_H_ */
Index: sys/arm64/coresight/coresight-funnel.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-funnel.c
@@ -0,0 +1,164 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <arm64/coresight/coresight.h>
+#include <arm64/coresight/coresight-funnel.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include "coresight_if.h"
+
+static struct ofw_compat_data compat_data[] = {
+	{ "arm,coresight-funnel",		1 },
+	{ NULL,					0 }
+};
+
+struct funnel_softc {
+	struct resource			*res;
+	struct coresight_platform_data	*pdata;
+};
+
+static struct resource_spec funnel_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+static int
+funnel_enable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct funnel_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	reg = bus_read_4(sc->res, FUNNEL_FUNCTL);
+	reg &= ~(FUNCTL_HOLDTIME_MASK);
+	reg |= (7 << FUNCTL_HOLDTIME_SHIFT);
+	reg |= (1 << endp->reg);
+	bus_write_4(sc->res, FUNNEL_FUNCTL, reg);
+
+	return (0);
+}
+
+static void
+funnel_disable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct funnel_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	reg = bus_read_4(sc->res, FUNNEL_FUNCTL);
+	reg &= ~(1 << endp->reg);
+	bus_write_4(sc->res, FUNNEL_FUNCTL, reg);
+}
+
+static int
+funnel_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Coresight Funnel");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+funnel_attach(device_t dev)
+{
+	struct coresight_desc desc;
+	struct funnel_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	if (bus_alloc_resources(dev, funnel_spec, &sc->res) != 0) {
+		device_printf(dev, "cannot allocate resources for device\n");
+		return (ENXIO);
+	}
+
+	sc->pdata = coresight_get_platform_data(dev);
+
+	desc.pdata = sc->pdata;
+	desc.dev = dev;
+	desc.dev_type = CORESIGHT_FUNNEL;
+	coresight_register(&desc);
+
+	/* Unlock Coresight */
+	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+
+	wmb();
+
+	if (bootverbose)
+		printf("Device ID: %x\n", bus_read_4(sc->res, FUNNEL_DEVICEID));
+
+	return (0);
+}
+
+static device_method_t funnel_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		funnel_probe),
+	DEVMETHOD(device_attach,	funnel_attach),
+
+	/* Coresight interface */
+	DEVMETHOD(coresight_enable,	funnel_enable),
+	DEVMETHOD(coresight_disable,	funnel_disable),
+	DEVMETHOD_END
+};
+
+static driver_t funnel_driver = {
+	"funnel",
+	funnel_methods,
+	sizeof(struct funnel_softc),
+};
+
+static devclass_t funnel_devclass;
+
+DRIVER_MODULE(funnel, simplebus, funnel_driver, funnel_devclass, 0, 0);
+MODULE_VERSION(funnel, 1);
Index: sys/arm64/coresight/coresight-tmc.h
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-tmc.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_CORESIGHT_CORESIGHT_TMC_H_
+#define	_ARM64_CORESIGHT_CORESIGHT_TMC_H_
+
+#define	TMC_RSZ		0x004 /* RAM Size Register */
+#define	TMC_STS		0x00C /* Status Register */
+#define	 STS_MEMERR	(1 << 5)
+#define	 STS_EMPTY	(1 << 4)
+#define	 STS_FTEMPTY	(1 << 3)
+#define	 STS_TMCREADY	(1 << 2)
+#define	 STS_TRIGGERED	(1 << 1)
+#define	 STS_FULL	(1 << 0)
+#define	TMC_RRD		0x010 /* RAM Read Data Register */
+#define	TMC_RRP		0x014 /* RAM Read Pointer Register */
+#define	TMC_RWP		0x018 /* RAM Write Pointer Register */
+#define	TMC_TRG		0x01C /* Trigger Counter Register */
+#define	TMC_CTL		0x020 /* Control Register */
+#define	 CTL_TRACECAPTEN	(1 << 0)	/* Controls trace capture. */
+#define	TMC_RWD		0x024 /* RAM Write Data Register */
+#define	TMC_MODE	0x028 /* Mode Register */
+#define	 MODE_HW_FIFO		2
+#define	 MODE_SW_FIFO		1
+#define	 MODE_CIRCULAR_BUFFER	0
+#define	TMC_LBUFLEVEL	0x02C /* Latched Buffer Fill Level */
+#define	TMC_CBUFLEVEL	0x030 /* Current Buffer Fill Level */
+#define	TMC_BUFWM	0x034 /* Buffer Level Water Mark */
+#define	TMC_RRPHI	0x038 /* RAM Read Pointer High Register */
+#define	TMC_RWPHI	0x03C /* RAM Write Pointer High Register */
+#define	TMC_AXICTL	0x110 /* AXI Control Register */
+#define	 AXICTL_WRBURSTLEN_S	8
+#define	 AXICTL_WRBURSTLEN_M	(0xf << AXICTL_WRBURSTLEN_S)
+#define	 AXICTL_WRBURSTLEN_16	(0xf << AXICTL_WRBURSTLEN_S)
+#define	 AXICTL_SG_MODE		(1 << 7)	/* Scatter Gather Mode */
+#define	 AXICTL_CACHE_CTRL_BIT3	(1 << 5)
+#define	 AXICTL_CACHE_CTRL_BIT2	(1 << 4)
+#define	 AXICTL_CACHE_CTRL_BIT1	(1 << 3)
+#define	 AXICTL_CACHE_CTRL_BIT0	(1 << 2)
+#define	 AXICTL_AXCACHE_OS	(0xf << 2)
+#define	 AXICTL_PROT_CTRL_BIT1	(1 << 1)
+#define	 AXICTL_PROT_CTRL_BIT0	(1 << 0)
+#define	TMC_DBALO	0x118 /* Data Buffer Address Low Register */
+#define	TMC_DBAHI	0x11C /* Data Buffer Address High Register */
+#define	TMC_FFSR	0x300 /* Formatter and Flush Status Register */
+#define	TMC_FFCR	0x304 /* Formatter and Flush Control Register */
+#define	 FFCR_EN_FMT		(1 << 0)
+#define	 FFCR_EN_TI		(1 << 1)
+#define	 FFCR_FON_FLIN		(1 << 4)
+#define	 FFCR_FON_TRIG_EVT	(1 << 5)
+#define	 FFCR_FLUSH_MAN		(1 << 6)
+#define	 FFCR_TRIGON_TRIGIN	(1 << 8)
+#define	TMC_PSCR	0x308 /* Periodic Synchronization Counter Register */
+#define	TMC_ITATBMDATA0	0xED0 /* Integration Test ATB Master Data Register 0 */
+#define	TMC_ITATBMCTR2	0xED4 /* Integration Test ATB Master Interface Control 2 Register */
+#define	TMC_ITATBMCTR1	0xED8 /* Integration Test ATB Master Control Register 1 */
+#define	TMC_ITATBMCTR0	0xEDC /* Integration Test ATB Master Interface Control 0 Register */
+#define	TMC_ITMISCOP0	0xEE0 /* Integration Test Miscellaneous Output Register 0 */
+#define	TMC_ITTRFLIN	0xEE8 /* Integration Test Trigger In and Flush In Register */
+#define	TMC_ITATBDATA0	0xEEC /* Integration Test ATB Data Register 0 */
+#define	TMC_ITATBCTR2	0xEF0 /* Integration Test ATB Control 2 Register */
+#define	TMC_ITATBCTR1	0xEF4 /* Integration Test ATB Control 1 Register */
+#define	TMC_ITATBCTR0	0xEF8 /* Integration Test ATB Control 0 Register */
+#define	TMC_ITCTRL	0xF00 /* Integration Mode Control Register */
+#define	TMC_CLAIMSET	0xFA0 /* Claim Tag Set Register */
+#define	TMC_CLAIMCLR	0xFA4 /* Claim Tag Clear Register */
+#define	TMC_LAR		0xFB0 /* Lock Access Register */
+#define	TMC_LSR		0xFB4 /* Lock Status Register */
+#define	TMC_AUTHSTATUS	0xFB8 /* Authentication Status Register */
+#define	TMC_DEVID	0xFC8 /* Device Configuration Register */
+#define	 DEVID_CONFIGTYPE_S	6
+#define	 DEVID_CONFIGTYPE_M	(0x3 << DEVID_CONFIGTYPE_S)
+#define	 DEVID_CONFIGTYPE_ETB	(0 << DEVID_CONFIGTYPE_S)
+#define	 DEVID_CONFIGTYPE_ETR	(1 << DEVID_CONFIGTYPE_S)
+#define	 DEVID_CONFIGTYPE_ETF	(2 << DEVID_CONFIGTYPE_S)
+#define	TMC_DEVTYPE	0xFCC /* Device Type Identifier Register */
+#define	TMC_PERIPHID4	0xFD0 /* Peripheral ID4 Register */
+#define	TMC_PERIPHID5	0xFD4 /* Peripheral ID5 Register */
+#define	TMC_PERIPHID6	0xFD8 /* Peripheral ID6 Register */
+#define	TMC_PERIPHID7	0xFDC /* Peripheral ID7 Register */
+#define	TMC_PERIPHID0	0xFE0 /* Peripheral ID0 Register */
+#define	TMC_PERIPHID1	0xFE4 /* Peripheral ID1 Register */
+#define	TMC_PERIPHID2	0xFE8 /* Peripheral ID2 Register */
+#define	TMC_PERIPHID3	0xFEC /* Peripheral ID3 Register */
+#define	TMC_COMPID0	0xFF0 /* Component ID0 Register */
+#define	TMC_COMPID1	0xFF4 /* Component ID1 Register */
+#define	TMC_COMPID2	0xFF8 /* Component ID2 Register */
+#define	TMC_COMPID3	0xFFC /* Component ID3 Register */
+
+#endif /* !_ARM64_CORESIGHT_CORESIGHT_TMC_H_ */
Index: sys/arm64/coresight/coresight-tmc.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight-tmc.c
@@ -0,0 +1,359 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <arm64/coresight/coresight.h>
+#include <arm64/coresight/coresight-tmc.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include "coresight_if.h"
+
+static struct ofw_compat_data compat_data[] = {
+	{ "arm,coresight-tmc",			1 },
+	{ NULL,					0 }
+};
+
+struct tmc_softc {
+	struct resource			*res;
+	device_t			dev;
+	uint64_t			cycle;
+	struct coresight_platform_data	*pdata;
+	uint32_t			dev_type;
+};
+
+static struct resource_spec tmc_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+static int
+tmc_unlock(struct tmc_softc *sc)
+{
+
+	/* Unlock Coresight */
+	bus_write_4(sc->res, CORESIGHT_LAR, CORESIGHT_UNLOCK);
+	wmb();
+
+	/* Unlock TMC */
+	bus_write_4(sc->res, TMC_LAR, CORESIGHT_UNLOCK);
+	wmb();
+
+	return (0);
+}
+
+static int
+tmc_start(device_t dev)
+{
+	struct tmc_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	if (bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN)
+		return (-1);
+
+	/* Enable TMC */
+	bus_write_4(sc->res, TMC_CTL, CTL_TRACECAPTEN);
+	if ((bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN) == 0)
+		panic("not enabled0\n");
+
+	do {
+		reg = bus_read_4(sc->res, TMC_STS);
+	} while ((reg & STS_TMCREADY) == 1);
+
+	if ((bus_read_4(sc->res, TMC_CTL) & CTL_TRACECAPTEN) == 0)
+		panic("not enabled1\n");
+
+	return (0);
+}
+
+static int
+tmc_stop(device_t dev)
+{
+	struct tmc_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	reg = bus_read_4(sc->res, TMC_CTL);
+	reg &= ~CTL_TRACECAPTEN;
+	bus_write_4(sc->res, TMC_CTL, reg);
+
+	do {
+		reg = bus_read_4(sc->res, TMC_STS);
+	} while ((reg & STS_TMCREADY) == 1);
+
+	return (0);
+}
+
+static int
+tmc_configure_etf(device_t dev)
+{
+	struct tmc_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	tmc_unlock(sc);
+
+	do {
+		reg = bus_read_4(sc->res, TMC_STS);
+	} while ((reg & STS_TMCREADY) == 0);
+
+	bus_write_4(sc->res, TMC_MODE, MODE_HW_FIFO);
+	bus_write_4(sc->res, TMC_FFCR, FFCR_EN_FMT | FFCR_EN_TI);
+	bus_write_4(sc->res, TMC_BUFWM, 0x800-1);
+
+	tmc_start(dev);
+
+	if (bootverbose)
+		printf("%s: STS %x, CTL %x, RSZ %x, RRP %x, RWP %x, "
+		    "LBUFLEVEL %x, CBUFLEVEL %x\n", __func__,
+		    bus_read_4(sc->res, TMC_STS),
+		    bus_read_4(sc->res, TMC_CTL),
+		    bus_read_4(sc->res, TMC_RSZ),
+		    bus_read_4(sc->res, TMC_RRP),
+		    bus_read_4(sc->res, TMC_RWP),
+		    bus_read_4(sc->res, TMC_CBUFLEVEL),
+		    bus_read_4(sc->res, TMC_LBUFLEVEL));
+
+	return (0);
+}
+
+static int
+tmc_configure_etr(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct tmc_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	tmc_unlock(sc);
+	tmc_stop(dev);
+
+	do {
+		reg = bus_read_4(sc->res, TMC_STS);
+	} while ((reg & STS_TMCREADY) == 0);
+
+	/* Configure TMC */
+	bus_write_4(sc->res, TMC_MODE, MODE_CIRCULAR_BUFFER);
+
+	reg = AXICTL_PROT_CTRL_BIT1;
+	reg |= AXICTL_WRBURSTLEN_16;
+
+	/*
+	 * SG operation is broken on DragonBoard 410c
+	 * reg |= AXICTL_SG_MODE;
+	 */
+
+	reg |= AXICTL_AXCACHE_OS;
+	bus_write_4(sc->res, TMC_AXICTL, reg);
+
+	reg = FFCR_EN_FMT | FFCR_EN_TI | FFCR_FON_FLIN |
+	    FFCR_FON_TRIG_EVT | FFCR_TRIGON_TRIGIN;
+	bus_write_4(sc->res, TMC_FFCR, reg);
+
+	bus_write_4(sc->res, TMC_TRG, 8);
+
+	bus_write_4(sc->res, TMC_DBALO, event->etr.low);
+	bus_write_4(sc->res, TMC_DBAHI, event->etr.high);
+	bus_write_4(sc->res, TMC_RSZ, event->etr.bufsize / 4);
+
+	bus_write_4(sc->res, TMC_RRP, event->etr.low);
+	bus_write_4(sc->res, TMC_RWP, event->etr.low);
+
+	reg = bus_read_4(sc->res, TMC_STS);
+	reg &= ~STS_FULL;
+	bus_write_4(sc->res, TMC_STS, reg);
+
+	tmc_start(dev);
+
+	return (0);
+}
+
+static int
+tmc_enable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct tmc_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	/* ETF configuration is static */
+	switch (sc->dev_type) {
+	case CORESIGHT_ETF:
+		return (0);
+	case CORESIGHT_ETR:
+		if (event->etr.started)
+			return (0);
+		tmc_unlock(sc);
+		tmc_stop(dev);
+		tmc_configure_etr(dev, endp, event);
+		tmc_start(dev);
+		event->etr.started = 1;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+static void
+tmc_disable(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+
+	/*
+	 * Can't restore the state: can't specify buffer offset 
+	 * to continue operation from. So we do not disable TMC here.
+	 */
+}
+
+static int
+tmc_read(device_t dev, struct endpoint *endp,
+    struct coresight_event *event)
+{
+	struct tmc_softc *sc;
+	uint32_t cur_ptr;
+
+	sc = device_get_softc(dev);
+
+	if (sc->dev_type == CORESIGHT_ETF)
+		return (0);
+
+	if (bus_read_4(sc->res, TMC_STS) & STS_FULL) {
+		event->etr.offset = 0;
+		event->etr.cycle++;
+		tmc_stop(dev);
+		tmc_start(dev);
+	} else {
+		cur_ptr = bus_read_4(sc->res, TMC_RWP);
+		event->etr.offset = (cur_ptr - event->etr.low);
+	}
+
+	return (0);
+}
+
+static int
+tmc_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Coresight Trace Memory Controller (TMC)");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+tmc_attach(device_t dev)
+{
+	struct coresight_desc desc;
+	struct tmc_softc *sc;
+	uint32_t reg;
+
+	sc = device_get_softc(dev);
+
+	sc->dev = dev;
+
+	if (bus_alloc_resources(dev, tmc_spec, &sc->res) != 0) {
+		device_printf(dev, "cannot allocate resources for device\n");
+		return (ENXIO);
+	}
+
+	sc->pdata = coresight_get_platform_data(dev);
+
+	desc.pdata = sc->pdata;
+	desc.dev = dev;
+
+	reg = bus_read_4(sc->res, TMC_DEVID);
+	reg &= DEVID_CONFIGTYPE_M;
+	switch (reg) {
+	case DEVID_CONFIGTYPE_ETR:
+		desc.dev_type = CORESIGHT_ETR;
+		sc->dev_type = CORESIGHT_ETR;
+		coresight_register(&desc);
+		if (bootverbose)
+			device_printf(dev, "ETR configuration found\n");
+		break;
+	case DEVID_CONFIGTYPE_ETF:
+		desc.dev_type = CORESIGHT_ETF;
+		sc->dev_type = CORESIGHT_ETF;
+		coresight_register(&desc);
+		tmc_configure_etf(dev);
+		if (bootverbose)
+			device_printf(dev, "ETF configuration found\n");
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+static device_method_t tmc_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		tmc_probe),
+	DEVMETHOD(device_attach,	tmc_attach),
+
+	/* Coresight interface */
+	DEVMETHOD(coresight_enable,	tmc_enable),
+	DEVMETHOD(coresight_disable,	tmc_disable),
+	DEVMETHOD(coresight_read,	tmc_read),
+	DEVMETHOD_END
+};
+
+static driver_t tmc_driver = {
+	"tmc",
+	tmc_methods,
+	sizeof(struct tmc_softc),
+};
+
+static devclass_t tmc_devclass;
+
+DRIVER_MODULE(tmc, simplebus, tmc_driver, tmc_devclass, 0, 0);
+MODULE_VERSION(tmc, 1);
Index: sys/arm64/coresight/coresight.h
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ARM64_CORESIGHT_CORESIGHT_H_
+#define _ARM64_CORESIGHT_CORESIGHT_H_
+
+#include <dev/ofw/openfirm.h>
+
+#define	CORESIGHT_ITCTRL	0xf00
+#define	CORESIGHT_CLAIMSET	0xfa0
+#define	CORESIGHT_CLAIMCLR	0xfa4
+#define	CORESIGHT_LAR		0xfb0
+#define	 CORESIGHT_UNLOCK	0xc5acce55
+#define	CORESIGHT_LSR		0xfb4
+#define	CORESIGHT_AUTHSTATUS	0xfb8
+#define	CORESIGHT_DEVID		0xfc8
+#define	CORESIGHT_DEVTYPE	0xfcc
+
+enum cs_dev_type {
+	CORESIGHT_ETMV4,
+	CORESIGHT_ETR,
+	CORESIGHT_ETF,
+	CORESIGHT_DYNAMIC_REPLICATOR,
+	CORESIGHT_FUNNEL,
+};
+
+struct coresight_device {
+	TAILQ_ENTRY(coresight_device) link;
+	device_t dev;
+	phandle_t node;
+	enum cs_dev_type dev_type;
+	struct coresight_platform_data *pdata;
+};
+
+struct endpoint {
+	TAILQ_ENTRY(endpoint) link;
+	phandle_t my_node;
+	phandle_t their_node;
+	phandle_t dev_node;
+	boolean_t slave;
+	int reg;
+	struct coresight_device *cs_dev;
+	LIST_ENTRY(endpoint) endplink;
+};
+
+struct coresight_platform_data {
+	int cpu;
+	int in_ports;
+	int out_ports;
+	struct mtx mtx_lock;
+	TAILQ_HEAD(endpoint_list, endpoint) endpoints;
+};
+
+struct coresight_desc {
+	struct coresight_platform_data *pdata;
+	device_t dev;
+	enum cs_dev_type dev_type;
+};
+
+TAILQ_HEAD(coresight_device_list, coresight_device);
+
+#define	ETM_N_COMPRATOR		16
+
+struct etr_status {
+	boolean_t started;
+	uint32_t cycle;
+	uint32_t offset;
+	uint32_t low;
+	uint32_t high;
+	uint32_t bufsize;
+};
+
+struct coresight_event {
+	LIST_HEAD(, endpoint) endplist;
+
+	uint64_t addr[ETM_N_COMPRATOR];
+	uint32_t naddr;
+	uint8_t excp_level;
+	enum cs_dev_type src;
+	enum cs_dev_type sink;
+
+	struct etr_status etr;
+};
+
+struct etm_config {
+	uint64_t addr[ETM_N_COMPRATOR];
+	uint32_t naddr;
+	uint8_t excp_level;
+};
+
+struct coresight_platform_data * coresight_get_platform_data(device_t dev);
+struct endpoint * coresight_get_output_endpoint(struct coresight_platform_data *pdata);
+struct coresight_device * coresight_get_output_device(struct endpoint *endp, struct endpoint **);
+int coresight_register(struct coresight_desc *desc);
+int coresight_init_event(int cpu, struct coresight_event *event);
+void coresight_enable(int cpu, struct coresight_event *event);
+void coresight_disable(int cpu, struct coresight_event *event);
+void coresight_read(int cpu, struct coresight_event *event);
+
+#endif /* !_ARM64_CORESIGHT_CORESIGHT_H_ */
Index: sys/arm64/coresight/coresight.c
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight.c
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <arm64/coresight/coresight.h>
+
+MALLOC_DEFINE(M_CORESIGHT, "coresight", "ARM Coresight");
+static struct mtx cs_mtx;
+
+struct coresight_device_list cs_devs;
+
+static int
+coresight_get_ports(phandle_t dev_node,
+    struct coresight_platform_data *pdata)
+{
+	phandle_t node, child;
+	pcell_t port_reg;
+	phandle_t xref;
+	char *name;
+	int ret;
+	phandle_t endpoint_child;
+	struct endpoint *endp;
+
+	child = ofw_bus_find_child(dev_node, "ports");
+	if (child)
+		node = child;
+	else
+		node = dev_node;
+
+	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
+		ret = OF_getprop_alloc(child, "name", sizeof(*name), (void **)&name);
+		if (ret == -1)
+			continue;
+
+		if (strcasecmp(name, "port") ||
+		    strncasecmp(name, "port@", 6)) {
+
+			port_reg = -1;
+			OF_getencprop(child, "reg", (void *)&port_reg, sizeof(port_reg));
+
+			endpoint_child = ofw_bus_find_child(child, "endpoint");
+			if (endpoint_child) {
+				if (OF_getencprop(endpoint_child, "remote-endpoint", &xref,
+				    sizeof(xref)) == -1) {
+					printf("failed\n");
+					continue;
+				}
+				endp = malloc(sizeof(struct endpoint), M_CORESIGHT,
+				    M_WAITOK | M_ZERO);
+				endp->my_node = endpoint_child;
+				endp->their_node = OF_node_from_xref(xref);
+				endp->dev_node = dev_node;
+				endp->reg = port_reg;
+				if (OF_getproplen(endpoint_child, "slave-mode") >= 0) {
+					pdata->in_ports++;
+					endp->slave = 1;
+				} else {
+					pdata->out_ports++;
+				}
+
+				mtx_lock(&pdata->mtx_lock);
+				TAILQ_INSERT_TAIL(&pdata->endpoints, endp, link);
+				mtx_unlock(&pdata->mtx_lock);
+			}
+		}
+	}
+
+	return (0);
+}
+
+int
+coresight_register(struct coresight_desc *desc)
+{
+	struct coresight_device *cs_dev;
+
+	cs_dev = malloc(sizeof(struct coresight_device),
+	    M_CORESIGHT, M_WAITOK | M_ZERO);
+	cs_dev->dev = desc->dev;
+	cs_dev->node = ofw_bus_get_node(desc->dev);
+	cs_dev->pdata = desc->pdata;
+	cs_dev->dev_type = desc->dev_type;
+
+	mtx_lock(&cs_mtx);
+	TAILQ_INSERT_TAIL(&cs_devs, cs_dev, link);
+	mtx_unlock(&cs_mtx);
+
+	return (0);
+}
+
+struct endpoint *
+coresight_get_output_endpoint(struct coresight_platform_data *pdata)
+{
+	struct endpoint *endp;
+
+	if (pdata->out_ports != 1)
+		return (NULL);
+
+	TAILQ_FOREACH(endp, &pdata->endpoints, link) {
+		if (endp->slave == 0)
+			return (endp);
+	}
+
+	return (NULL);
+}
+
+struct coresight_device *
+coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp)
+{
+	struct coresight_device *cs_dev;
+	struct endpoint *endp2;
+
+	TAILQ_FOREACH(cs_dev, &cs_devs, link) {
+		TAILQ_FOREACH(endp2, &cs_dev->pdata->endpoints, link) {
+			if (endp->their_node == endp2->my_node) {
+				*out_endp = endp2;
+				return (cs_dev);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+static int
+coresight_get_cpu(phandle_t node,
+    struct coresight_platform_data *pdata)
+{
+	phandle_t cpu_node;
+	pcell_t xref;
+	pcell_t cpu_reg;
+
+	if (OF_getencprop(node, "cpu", &xref, sizeof(xref)) != -1) {
+		cpu_node = OF_node_from_xref(xref);
+		if (OF_getencprop(cpu_node, "reg", (void *)&cpu_reg,
+			sizeof(cpu_reg)) > 0) {
+			pdata->cpu = cpu_reg;
+
+			return (0);
+		}
+	}
+
+	return (-1);
+}
+
+struct coresight_platform_data *
+coresight_get_platform_data(device_t dev)
+{
+	struct coresight_platform_data *pdata;
+	phandle_t node;
+
+	node = ofw_bus_get_node(dev);
+
+	pdata = malloc(sizeof(struct coresight_platform_data),
+	    M_CORESIGHT, M_WAITOK | M_ZERO);
+	mtx_init(&pdata->mtx_lock, "Coresight Platform Data", NULL, MTX_DEF);
+	TAILQ_INIT(&pdata->endpoints);
+
+	coresight_get_cpu(node, pdata);
+	coresight_get_ports(node, pdata);
+
+	if (bootverbose)
+		printf("Total ports: in %d out %d\n",
+		    pdata->in_ports, pdata->out_ports);
+
+	return (pdata);
+}
+
+static void
+coresight_init(void)
+{
+
+	mtx_init(&cs_mtx, "ARM Coresight", NULL, MTX_DEF);
+	TAILQ_INIT(&cs_devs);
+}
+
+SYSINIT(coresight, SI_SUB_DRIVERS, SI_ORDER_FIRST, coresight_init, NULL);
Index: sys/arm64/coresight/coresight_if.m
===================================================================
--- /dev/null
+++ sys/arm64/coresight/coresight_if.m
@@ -0,0 +1,50 @@
+#-
+# Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <machine/bus.h>
+#include <arm64/coresight/coresight.h>
+
+INTERFACE coresight;
+
+METHOD int enable {
+	device_t dev;
+	struct endpoint *endp;
+	struct coresight_event *event;
+};
+
+METHOD void disable {
+	device_t dev;
+	struct endpoint *endp;
+	struct coresight_event *event;
+};
+
+METHOD int read {
+	device_t dev;
+	struct endpoint *endp;
+	struct coresight_event *event;
+};
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -1800,6 +1800,7 @@
 dev/hptiop/hptiop.c		optional hptiop scbus
 dev/hwpmc/hwpmc_logging.c	optional hwpmc
 dev/hwpmc/hwpmc_mod.c		optional hwpmc
+dev/hwpmc/hwpmc_vm.c		optional hwpmc
 dev/hwpmc/hwpmc_soft.c		optional hwpmc
 dev/ichiic/ig4_acpi.c		optional ig4 acpi iicbus
 dev/ichiic/ig4_iic.c		optional ig4 iicbus
Index: sys/dev/hwpmc/hwpmc_arm64.c
===================================================================
--- sys/dev/hwpmc/hwpmc_arm64.c
+++ sys/dev/hwpmc/hwpmc_arm64.c
@@ -480,6 +480,7 @@
 	struct pmc_mdep *pmc_mdep;
 	struct pmc_classdep *pcd;
 	int idcode;
+	int ncpus;
 	int reg;
 
 	reg = arm64_pmcr_read();
@@ -495,8 +496,8 @@
 	arm64_pcpu = malloc(sizeof(struct arm64_cpu *) * pmc_cpu_max(),
 		M_PMC, M_WAITOK | M_ZERO);
 
-	/* Just one class */
-	pmc_mdep = pmc_mdep_alloc(1);
+	/* CPU counters and ETM */
+	pmc_mdep = pmc_mdep_alloc(2);
 
 	switch (idcode) {
 	case PMCR_IDCODE_CORTEX_A57:
@@ -534,6 +535,9 @@
 
 	pmc_mdep->pmd_npmc   += arm64_npmcs;
 
+	ncpus = pmc_cpu_max();
+	pmc_coresight_initialize(pmc_mdep, ncpus);
+
 	return (pmc_mdep);
 }
 
@@ -541,4 +545,5 @@
 pmc_arm64_finalize(struct pmc_mdep *md)
 {
 
+	pmc_coresight_finalize(md);
 }
Index: sys/dev/hwpmc/hwpmc_core.c
===================================================================
--- sys/dev/hwpmc/hwpmc_core.c
+++ sys/dev/hwpmc/hwpmc_core.c
@@ -2287,6 +2287,7 @@
 		break;
 	case PMC_CPU_INTEL_SKYLAKE:
 	case PMC_CPU_INTEL_SKYLAKE_XEON:
+	case PMC_CPU_INTEL_KABYLAKE:
 	case PMC_CPU_INTEL_BROADWELL:
 	case PMC_CPU_INTEL_BROADWELL_XEON:
 	case PMC_CPU_INTEL_SANDYBRIDGE:
@@ -2325,6 +2326,7 @@
 		cpuflag = IAP_F_SLX;
 		break;
 	case PMC_CPU_INTEL_SKYLAKE:
+	case PMC_CPU_INTEL_KABYLAKE:
 		cpuflag = IAP_F_SL;
 		break;
 	case PMC_CPU_INTEL_BROADWELL_XEON:
@@ -2846,6 +2848,12 @@
 	struct core_cpu *cc;
 	pmc_value_t v;
 
+	error = pmc_pt_intr(cpu, tf);
+	if (error) {
+		/* Found */
+		return (1);
+	}
+
 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
 	    TRAPF_USERMODE(tf));
 
Index: sys/dev/hwpmc/hwpmc_cs.h
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_cs.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _DEV_HWPMC_CS_H_
+#define _DEV_HWPMC_CS_H_
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <vm/vm.h>
+
+#include <machine/frame.h>
+
+#define	CORESIGHT_NADDR	4
+#define	CORESIGHT_NPMCS	1
+
+struct pmc_md_coresight_op_pmcallocate {
+	uint32_t		flags;
+	uint64_t		ranges[2 * CORESIGHT_NADDR];
+	int			nranges;
+};
+
+#ifdef	_KERNEL
+struct coresight_buffer {
+	uint64_t		phys_base;
+	vm_object_t		obj;
+};
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_coresight_pmc {
+	struct coresight_buffer	coresight_buffers[MAXCPU];
+};
+
+/*
+ * Prototypes.
+ */
+
+int	pmc_coresight_initialize(struct pmc_mdep *_md, int _maxcpu);
+void	pmc_coresight_finalize(struct pmc_mdep *_md);
+int	pmc_coresight_intr(int cpu, struct trapframe *tf);
+
+#endif /* !_KERNEL */
+#endif /* !_DEV_HWPMC_CS_H_ */
Index: sys/dev/hwpmc/hwpmc_cs.c
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_cs.c
@@ -0,0 +1,714 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+#include <sys/pmckern.h>
+#include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/vmem.h>
+#include <sys/vmmeter.h>
+#include <sys/kthread.h>
+#include <sys/pmclog.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+
+#include <arm64/coresight/coresight.h>
+
+#include <dev/hwpmc/hwpmc_vm.h>
+
+static MALLOC_DEFINE(M_CORESIGHT, "coresight", "CORESIGHT driver");
+
+extern struct cdev *pmc_cdev[MAXCPU];
+
+/*
+ * ARM CORESIGHT support.
+ *
+ * Limitation of hardware:
+ * - Scatter-gather operation is broken in hardware on
+ *   Qualcomm Snapdragon 410e processor.
+ * - None of coresight interconnect devices provides an interrupt line.
+ * - Circular-buffer is the only mode of operation for TMC(ETR).
+ *
+ * I.e. once buffer filled the operation will not be halted,
+ * instead the buffer will be overwritten from start and none of
+ * interrupt provided.
+ */
+
+#define	CORESIGHT_CAPS (PMC_CAP_READ | PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | PMC_CAP_USER)
+
+#define	PMC_CORESIGHT_DEBUG
+#undef	PMC_CORESIGHT_DEBUG
+
+#ifdef	PMC_CORESIGHT_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+struct coresight_descr {
+	struct pmc_descr pm_descr;  /* "base class" */
+};
+
+static struct coresight_descr coresight_pmcdesc[CORESIGHT_NPMCS] =
+{
+    {
+	.pm_descr =
+	{
+		.pd_name  = "CORESIGHT",
+		.pd_class = PMC_CLASS_CORESIGHT,
+		.pd_caps  = CORESIGHT_CAPS,
+		.pd_width = 64
+	}
+    }
+};
+
+/*
+ * Per-CPU data structure for PTs.
+ */
+
+struct coresight_cpu {
+	struct pmc_hw			tc_hw;
+	uint32_t			l0_eax;
+	uint32_t			l0_ebx;
+	uint32_t			l0_ecx;
+	uint32_t			l1_eax;
+	uint32_t			l1_ebx;
+	struct pmc			*pm_mmap;
+	uint32_t			flags;
+#define	FLAG_CORESIGHT_ALLOCATED		(1 << 0)
+	struct coresight_event		event;
+};
+
+static struct coresight_cpu **coresight_pcpu;
+
+static int
+coresight_buffer_allocate(uint32_t cpu,
+    struct coresight_buffer *coresight_buf, uint32_t bufsize)
+{
+	struct pmc_vm_map *map;
+	struct coresight_cpu *coresight_pc;
+	uint64_t phys_base;
+	struct cdev_cpu *cc;
+	vm_object_t obj;
+	vm_page_t m;
+	int npages;
+
+	dprintf("%s\n", __func__);
+
+	coresight_pc = coresight_pcpu[cpu];
+
+	coresight_buf->obj = obj = vm_pager_allocate(OBJT_PHYS, 0, bufsize,
+	    PROT_READ, 0, curthread->td_ucred);
+
+	npages = bufsize / PAGE_SIZE;
+
+	VM_OBJECT_WLOCK(obj);
+	m = vm_page_alloc_contig(obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO,
+	    npages, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
+	if (m == NULL) {
+		VM_OBJECT_WUNLOCK(obj);
+		printf("%s: Can't allocate memory.\n", __func__);
+		vm_object_deallocate(obj);
+		return (-1);
+	}
+	phys_base = VM_PAGE_TO_PHYS(m);
+	for (; m != NULL; m = vm_page_next(m)) {
+		if ((m->flags & PG_ZERO) == 0)
+			pmap_zero_page(m);
+		m->valid = VM_PAGE_BITS_ALL;
+	}
+	VM_OBJECT_WUNLOCK(obj);
+
+	map = malloc(sizeof(struct pmc_vm_map), M_CORESIGHT, M_WAITOK | M_ZERO);
+	map->t = curthread;
+	map->obj = obj;
+	map->buf = (void *)coresight_buf;
+
+	cc = pmc_cdev[cpu]->si_drv1;
+
+	mtx_lock(&cc->vm_mtx);
+	TAILQ_INSERT_HEAD(&cc->pmc_maplist, map, map_next);
+	mtx_unlock(&cc->vm_mtx);
+
+	coresight_buf->phys_base = phys_base;
+
+	return (0);
+}
+
+static int
+coresight_buffer_deallocate(uint32_t cpu,
+    struct coresight_buffer *coresight_buf)
+{
+	struct pmc_vm_map *map, *map_tmp;
+	struct cdev_cpu *cc;
+
+	cc = pmc_cdev[cpu]->si_drv1;
+
+	dprintf("%s\n", __func__);
+
+	mtx_lock(&cc->vm_mtx);
+	TAILQ_FOREACH_SAFE(map, &cc->pmc_maplist, map_next, map_tmp) {
+		KASSERT(map->t == curthread,
+		    ("Deallocation must be done in the same"
+		    "thread as allocation"));
+		if (map->buf == (void *)coresight_buf) {
+			TAILQ_REMOVE(&cc->pmc_maplist, map, map_next);
+			free(map, M_CORESIGHT);
+			break;
+		}
+	}
+	mtx_unlock(&cc->vm_mtx);
+
+	return (0);
+}
+
+static int
+coresight_buffer_prepare(uint32_t cpu, struct pmc *pm,
+    const struct pmc_op_pmcallocate *a)
+{
+	const struct pmc_md_coresight_op_pmcallocate *pm_coresighta;
+	struct coresight_cpu *coresight_pc;
+	struct pmc_md_coresight_pmc *pm_coresight;
+	struct coresight_buffer *coresight_buf;
+	uint32_t bufsize;
+	enum pmc_mode mode;
+	uint32_t phys_lo;
+	uint32_t phys_hi;
+	int error;
+	struct coresight_event *event;
+
+	coresight_pc = coresight_pcpu[cpu];
+	event = &coresight_pc->event;
+
+	pm_coresighta = (const struct pmc_md_coresight_op_pmcallocate *)
+	    &a->pm_md.pm_coresight;
+	pm_coresight = (struct pmc_md_coresight_pmc *)&pm->pm_md;
+	coresight_buf = &pm_coresight->coresight_buffers[cpu];
+
+	bufsize = 16 * 1024 * 1024;
+	error = coresight_buffer_allocate(cpu, coresight_buf, bufsize);
+	if (error != 0) {
+		dprintf("%s: can't allocate buffers\n", __func__);
+		return (EINVAL);
+	}
+
+	phys_lo = coresight_buf->phys_base & 0xffffffff;
+	phys_hi = (coresight_buf->phys_base >> 32) & 0xffffffff;
+	event->naddr = 0;
+
+	event->etr.started = 0;
+	event->etr.low = phys_lo;
+	event->etr.high = phys_hi;
+
+	mode = PMC_TO_MODE(pm);
+	if (mode == PMC_MODE_ST)
+		event->excp_level = 1;
+	else if (mode == PMC_MODE_TT)
+		event->excp_level = 0;
+	else {
+		dprintf("%s: unsupported mode %d\n", __func__, mode);
+		return (-1);
+	}
+
+	event->src = CORESIGHT_ETMV4;
+	event->sink = CORESIGHT_ETR;
+
+	coresight_init_event(cpu, event);
+
+	return (0);
+}
+
+static int
+coresight_allocate_pmc(int cpu, int ri, struct pmc *pm,
+    const struct pmc_op_pmcallocate *a)
+{
+	struct coresight_cpu *coresight_pc;
+	int i;
+
+	coresight_pc = coresight_pcpu[cpu];
+
+	dprintf("%s: curthread %lx, cpu %d (curcpu %d)\n", __func__,
+	    (uint64_t)curthread, cpu, PCPU_GET(cpuid));
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__,
+	    cpu, PCPU_GET(cpuid));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < CORESIGHT_NPMCS,
+	    ("[coresight,%d] illegal row index %d", __LINE__, ri));
+
+	if (a->pm_class != PMC_CLASS_CORESIGHT)
+		return (EINVAL);
+
+	if (a->pm_ev != PMC_EV_CORESIGHT_CORESIGHT)
+		return (EINVAL);
+
+	if ((pm->pm_caps & CORESIGHT_CAPS) == 0)
+		return (EINVAL);
+
+	if ((pm->pm_caps & ~CORESIGHT_CAPS) != 0)
+		return (EPERM);
+
+	if (a->pm_mode != PMC_MODE_ST &&
+	    a->pm_mode != PMC_MODE_TT)
+		return (EINVAL);
+
+	/* Can't allocate multiple ST */
+	if (a->pm_mode == PMC_MODE_ST &&
+	    coresight_pc->flags & FLAG_CORESIGHT_ALLOCATED) {
+		dprintf("error: coresight is already allocated for CPU %d\n",
+		    cpu);
+		return (EUSERS);
+	}
+
+	if (a->pm_mode == PMC_MODE_TT)
+		for (i = 0; i < pmc_cpu_max(); i++) {
+			if (coresight_buffer_prepare(i, pm, a))
+				return (EINVAL);
+		}
+	else
+		if (coresight_buffer_prepare(cpu, pm, a))
+			return (EINVAL);
+
+	if (a->pm_mode == PMC_MODE_ST)
+		coresight_pc->flags |= FLAG_CORESIGHT_ALLOCATED;
+
+	return (0);
+}
+
+static int
+coresight_config_pmc(int cpu, int ri, struct pmc *pm)
+{
+	struct coresight_cpu *coresight_pc;
+	struct pmc_hw *phw;
+
+	dprintf("%s: cpu %d (pm %lx)\n", __func__, cpu, (uint64_t)pm);
+
+	PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	coresight_pc = coresight_pcpu[cpu];
+	phw = &coresight_pc->tc_hw;
+
+	KASSERT(pm == NULL || phw->phw_pmc == NULL,
+	    ("[coresight,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__,
+	    pm, phw->phw_pmc));
+
+	phw->phw_pmc = pm;
+
+	return (0);
+}
+
+static int
+coresight_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
+{
+	const struct coresight_descr *pd;
+	struct pmc_hw *phw;
+	size_t copied;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &coresight_pcpu[cpu]->tc_hw;
+	pd = &coresight_pmcdesc[ri];
+
+	if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
+	    PMC_NAME_MAX, &copied)) != 0)
+		return (error);
+
+	pi->pm_class = pd->pm_descr.pd_class;
+
+	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
+		pi->pm_enabled = TRUE;
+		*ppmc          = phw->phw_pmc;
+	} else {
+		pi->pm_enabled = FALSE;
+		*ppmc          = NULL;
+	}
+
+	return (0);
+}
+
+static int
+coresight_get_config(int cpu, int ri, struct pmc **ppm)
+{
+	struct coresight_cpu *coresight_pc;
+	struct pmc_hw *phw;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	coresight_pc = coresight_pcpu[cpu];
+	phw = &coresight_pc->tc_hw;
+
+	*ppm = phw->phw_pmc;
+
+	return (0);
+}
+
+static int
+coresight_pcpu_init(struct pmc_mdep *md, int cpu)
+{
+	struct pmc_cpu *pc;
+	struct coresight_cpu *coresight_pc;
+	int ri;
+
+	dprintf("%s: cpu %d\n", __func__, cpu);
+
+	KASSERT(cpu == PCPU_GET(cpuid), ("Init on wrong CPU\n"));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal cpu %d", __LINE__, cpu));
+	KASSERT(coresight_pcpu, ("[coresight,%d] null pcpu", __LINE__));
+	KASSERT(coresight_pcpu[cpu] == NULL, ("[coresight,%d] non-null per-cpu",
+	    __LINE__));
+
+	coresight_pc = malloc(sizeof(struct coresight_cpu),
+	    M_CORESIGHT, M_WAITOK | M_ZERO);
+	coresight_pc->tc_hw.phw_state = PMC_PHW_FLAG_IS_ENABLED |
+	    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
+	    PMC_PHW_FLAG_IS_SHAREABLE;
+
+	coresight_pcpu[cpu] = coresight_pc;
+
+	ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_CORESIGHT].pcd_ri;
+
+	KASSERT(pmc_pcpu, ("[coresight,%d] null generic pcpu", __LINE__));
+
+	pc = pmc_pcpu[cpu];
+
+	KASSERT(pc, ("[coresight,%d] null generic per-cpu", __LINE__));
+
+	pc->pc_hwpmcs[ri] = &coresight_pc->tc_hw;
+
+	return (0);
+}
+
+static int
+coresight_pcpu_fini(struct pmc_mdep *md, int cpu)
+{
+	int ri;
+	struct pmc_cpu *pc;
+	struct coresight_cpu *coresight_pc;
+
+	dprintf("%s: cpu %d\n", __func__, cpu);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal cpu %d", __LINE__, cpu));
+	KASSERT(coresight_pcpu[cpu] != NULL, ("[coresight,%d] null pcpu",
+	    __LINE__));
+
+	coresight_pc = coresight_pcpu[cpu];
+
+	free(coresight_pcpu[cpu], M_CORESIGHT);
+	coresight_pcpu[cpu] = NULL;
+
+	ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_CORESIGHT].pcd_ri;
+
+	pc = pmc_pcpu[cpu];
+	pc->pc_hwpmcs[ri] = NULL;
+
+	return (0);
+}
+
+static int
+coresight_trace_config(int cpu, int ri, struct pmc *pm,
+    uint64_t *ranges, uint32_t nranges)
+{
+	struct coresight_event *event;
+	struct coresight_cpu *coresight_pc;
+	int i;
+
+	dprintf("%s\n", __func__);
+
+	coresight_pc = coresight_pcpu[cpu];
+	event = &coresight_pc->event;
+
+	KASSERT(cpu == PCPU_GET(cpuid), ("Configuring wrong CPU\n"));
+
+	for (i = 0; i < nranges * 2; i++)
+		event->addr[i] = ranges[i];
+
+	event->naddr = nranges;
+
+	enum pmc_mode mode;
+	mode = PMC_TO_MODE(pm);
+	if (mode == PMC_MODE_ST)
+		event->excp_level = 1;
+	else
+		event->excp_level = 0;
+
+	event->src = CORESIGHT_ETMV4;
+	event->sink = CORESIGHT_ETR;
+
+	return (0);
+}
+
+static int
+coresight_read_trace(int cpu, int ri, struct pmc *pm,
+    pmc_value_t *vcycle, pmc_value_t *voffset)
+{
+	struct pmc_md_coresight_pmc *pm_coresight;
+	struct coresight_event *event;
+	struct coresight_buffer *coresight_buf;
+	struct coresight_cpu *coresight_pc;
+	uint64_t offset;
+	uint64_t cycle;
+
+	dprintf("%s\n", __func__);
+
+	coresight_pc = coresight_pcpu[cpu];
+	coresight_pc->pm_mmap = pm;
+	event = &coresight_pc->event;
+
+	coresight_read(cpu, event);
+
+	cycle = event->etr.cycle;
+	offset = event->etr.offset;
+
+	pm_coresight = (struct pmc_md_coresight_pmc *)&pm->pm_md;
+	coresight_buf = &pm_coresight->coresight_buffers[cpu];
+
+	*vcycle = cycle;
+	*voffset = offset;
+
+	return (0);
+}
+
+static int
+coresight_read_pmc(int cpu, int ri, pmc_value_t *v)
+{
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal ri %d", __LINE__, ri));
+
+	*v = 0;
+
+	return (0);
+}
+
+static int
+coresight_release_pmc(int cpu, int ri, struct pmc *pm)
+{
+	struct pmc_md_coresight_pmc *pm_coresight;
+	struct coresight_event *event;
+	struct coresight_cpu *coresight_pc;
+	enum pmc_mode mode;
+	struct pmc_hw *phw;
+	int i;
+
+	pm_coresight = (struct pmc_md_coresight_pmc *)&pm->pm_md;
+	coresight_pc = coresight_pcpu[cpu];
+	event = &coresight_pc->event;
+
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0,
+	    ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &coresight_pcpu[cpu]->tc_hw;
+	phw->phw_pmc = NULL;
+
+	KASSERT(phw->phw_pmc == NULL,
+	    ("[coresight,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
+
+	coresight_disable(cpu, event);
+
+	mode = PMC_TO_MODE(pm);
+	if (mode == PMC_MODE_TT)
+		for (i = 0; i < pmc_cpu_max(); i++)
+			coresight_buffer_deallocate(i,
+			    &pm_coresight->coresight_buffers[i]);
+	else
+		coresight_buffer_deallocate(cpu,
+		    &pm_coresight->coresight_buffers[cpu]);
+
+	if (mode == PMC_MODE_ST)
+		coresight_pc->flags &= ~FLAG_CORESIGHT_ALLOCATED;
+
+	return (0);
+}
+
+static int
+coresight_start_pmc(int cpu, int ri)
+{
+	struct coresight_event *event;
+	struct coresight_cpu *coresight_pc;
+	struct pmc_hw *phw;
+
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+	coresight_pc = coresight_pcpu[cpu];
+	event = &coresight_pc->event;
+	phw = &coresight_pc->tc_hw;
+	if (phw == NULL || phw->phw_pmc == NULL)
+		return (-1);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	coresight_enable(cpu, event);
+
+	return (0);
+}
+
+static int
+coresight_stop_pmc(int cpu, int ri)
+{
+	struct coresight_event *event;
+	struct coresight_cpu *coresight_pc;
+
+	dprintf("%s\n", __func__);
+
+	coresight_pc = coresight_pcpu[cpu];
+	event = &coresight_pc->event;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	coresight_disable(cpu, event);
+
+	return (0);
+}
+
+static int
+coresight_write_pmc(int cpu, int ri, pmc_value_t v)
+{
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[coresight,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[coresight,%d] illegal row-index %d", __LINE__, ri));
+
+	return (0);
+}
+
+int
+pmc_coresight_initialize(struct pmc_mdep *md, int maxcpu)
+{
+	struct pmc_classdep *pcd;
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(md != NULL, ("[coresight,%d] md is NULL", __LINE__));
+	KASSERT(md->pmd_nclass >= 1, ("[coresight,%d] dubious md->nclass %d",
+	    __LINE__, md->pmd_nclass));
+
+	coresight_pcpu = malloc(sizeof(struct coresight_cpu *) * maxcpu,
+	    M_CORESIGHT, M_WAITOK | M_ZERO);
+
+	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_CORESIGHT];
+
+	pcd->pcd_caps	= CORESIGHT_CAPS;
+	pcd->pcd_class	= PMC_CLASS_CORESIGHT;
+	pcd->pcd_num	= CORESIGHT_NPMCS;
+	pcd->pcd_ri	= md->pmd_npmc;
+	pcd->pcd_width	= 64;
+
+	pcd->pcd_allocate_pmc = coresight_allocate_pmc;
+	pcd->pcd_config_pmc   = coresight_config_pmc;
+	pcd->pcd_describe     = coresight_describe;
+	pcd->pcd_get_config   = coresight_get_config;
+	pcd->pcd_pcpu_init    = coresight_pcpu_init;
+	pcd->pcd_pcpu_fini    = coresight_pcpu_fini;
+	pcd->pcd_read_pmc     = coresight_read_pmc;
+	pcd->pcd_read_trace   = coresight_read_trace;
+	pcd->pcd_trace_config = coresight_trace_config;
+	pcd->pcd_release_pmc  = coresight_release_pmc;
+	pcd->pcd_start_pmc    = coresight_start_pmc;
+	pcd->pcd_stop_pmc     = coresight_stop_pmc;
+	pcd->pcd_write_pmc    = coresight_write_pmc;
+
+	md->pmd_npmc += CORESIGHT_NPMCS;
+
+	return (0);
+}
+
+void
+pmc_coresight_finalize(struct pmc_mdep *md)
+{
+
+	dprintf("%s\n", __func__);
+
+#ifdef INVARIANTS
+	int i, ncpus;
+
+	ncpus = pmc_cpu_max();
+	for (i = 0; i < ncpus; i++)
+		KASSERT(coresight_pcpu[i] == NULL,
+		    ("[coresight,%d] non-null pcpu cpu %d", __LINE__, i));
+
+	KASSERT(md->pmd_classdep[PMC_MDEP_CLASS_INDEX_CORESIGHT].pcd_class ==
+	    PMC_CLASS_CORESIGHT, ("[coresight,%d] class mismatch", __LINE__));
+#endif
+
+	free(coresight_pcpu, M_CORESIGHT);
+	coresight_pcpu = NULL;
+}
Index: sys/dev/hwpmc/hwpmc_intel.c
===================================================================
--- sys/dev/hwpmc/hwpmc_intel.c
+++ sys/dev/hwpmc/hwpmc_intel.c
@@ -181,12 +181,8 @@
 			cputype = PMC_CPU_INTEL_IVYBRIDGE_XEON;
 			nclasses = 3;
 			break;
-			/* Skylake */
 		case 0x4e:
 		case 0x5e:
-			/* Kabylake */
-		case 0x8E:	/* Per Intel document 325462-063US July 2017. */
-		case 0x9E:	/* Per Intel document 325462-063US July 2017. */
 			cputype = PMC_CPU_INTEL_SKYLAKE;
 			nclasses = 3;
 			break;
@@ -220,6 +216,11 @@
 			nclasses = 3;
 			break;
 		}
+		case 0x8E:	/* Per Intel document 325462-063US July 2017. */
+		case 0x9E:	/* Per Intel document 325462-063US July 2017. */
+			cputype = PMC_CPU_INTEL_KABYLAKE;
+			nclasses = 4;
+			break;
 		break;
 #if	defined(__i386__) || defined(__amd64__)
 	case 0xF00:		/* P4 */
@@ -237,7 +238,7 @@
 	/* Allocate base class and initialize machine dependent struct */
 	pmc_mdep = pmc_mdep_alloc(nclasses);
 
-	pmc_mdep->pmd_cputype	 = cputype;
+	pmc_mdep->pmd_cputype    = cputype;
 	pmc_mdep->pmd_switch_in	 = intel_switch_in;
 	pmc_mdep->pmd_switch_out = intel_switch_out;
 
@@ -256,6 +257,7 @@
 	case PMC_CPU_INTEL_BROADWELL_XEON:
 	case PMC_CPU_INTEL_SKYLAKE_XEON:
 	case PMC_CPU_INTEL_SKYLAKE:
+	case PMC_CPU_INTEL_KABYLAKE:
 	case PMC_CPU_INTEL_CORE:
 	case PMC_CPU_INTEL_CORE2:
 	case PMC_CPU_INTEL_CORE2EXTREME:
@@ -312,10 +314,10 @@
 		goto error;
 	}
 
+#if defined(__i386__) || defined(__amd64__)
 	/*
 	 * Init the uncore class.
 	 */
-#if	defined(__i386__) || defined(__amd64__)
 	switch (cputype) {
 		/*
 		 * Intel Corei7 and Westmere processors.
@@ -330,7 +332,19 @@
 	default:
 		break;
 	}
+
+	/*
+	 * Intel Processor Tracing (PT).
+	 */
+	if (cputype == PMC_CPU_INTEL_KABYLAKE) {
+		error = pmc_pt_initialize(pmc_mdep, ncpus);
+		if (error) {
+			pmc_pt_finalize(pmc_mdep);
+			goto error;
+		}
+	}
 #endif
+
   error:
 	if (error) {
 		pmc_mdep_free(pmc_mdep);
@@ -353,6 +367,7 @@
 	case PMC_CPU_INTEL_BROADWELL_XEON:
 	case PMC_CPU_INTEL_SKYLAKE_XEON:
 	case PMC_CPU_INTEL_SKYLAKE:
+	case PMC_CPU_INTEL_KABYLAKE:
 	case PMC_CPU_INTEL_CORE:
 	case PMC_CPU_INTEL_CORE2:
 	case PMC_CPU_INTEL_CORE2EXTREME:
@@ -389,10 +404,10 @@
 		KASSERT(0, ("[intel,%d] unknown CPU type", __LINE__));
 	}
 
+#if defined(__i386__) || defined(__amd64__)
 	/*
 	 * Uncore.
 	 */
-#if	defined(__i386__) || defined(__amd64__)
 	switch (md->pmd_cputype) {
 	case PMC_CPU_INTEL_BROADWELL:
 	case PMC_CPU_INTEL_COREI7:
@@ -404,5 +419,11 @@
 	default:
 		break;
 	}
+
+	/*
+	 * Intel Processor Tracing (PT).
+	 */
+	if (md->pmd_cputype == PMC_CPU_INTEL_KABYLAKE)
+		pmc_pt_finalize(md);
 #endif
 }
Index: sys/dev/hwpmc/hwpmc_mod.c
===================================================================
--- sys/dev/hwpmc/hwpmc_mod.c
+++ sys/dev/hwpmc/hwpmc_mod.c
@@ -74,6 +74,7 @@
 #include <vm/vm_object.h>
 
 #include "hwpmc_soft.h"
+#include "hwpmc_vm.h"
 
 /*
  * Types
@@ -1295,6 +1296,8 @@
 			    pp->pp_pmcs[ri].pp_pmcval;
 			pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount;
 			mtx_pool_unlock_spin(pmc_mtxpool, pm);
+		} else if (PMC_TO_MODE(pm) == PMC_MODE_TT) {
+			/* Nothing */
 		} else {
 			KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
 			    ("[pmc,%d] illegal mode=%d", __LINE__,
@@ -1310,7 +1313,8 @@
 		pcd->pcd_write_pmc(cpu, adjri, newvalue);
 
 		/* If a sampling mode PMC, reset stalled state. */
-		if (PMC_TO_MODE(pm) == PMC_MODE_TS)
+		if (PMC_TO_MODE(pm) == PMC_MODE_TS ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
 
 		/* Indicate that we desire this to run. */
@@ -1472,7 +1476,8 @@
 				    pp->pp_pmcs[ri].pp_pmcval,
 				    pm->pm_sc.pm_reloadcount));
 				mtx_pool_unlock_spin(pmc_mtxpool, pm);
-
+			} else if (mode == PMC_MODE_TT) {
+				/* Nothing */
 			} else {
 				tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
 
@@ -1528,6 +1533,10 @@
 	const struct pmc *pm;
 	struct pmc_owner *po;
 	const struct pmc_process *pp;
+	struct proc *p;
+	bool pause_thread;
+
+	sx_slock(&pmc_sx);
 
 	freepath = fullpath = NULL;
 	pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
@@ -1539,17 +1548,42 @@
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
 
-	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
+	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) {
+		sx_sunlock(&pmc_sx);
 		goto done;
+	}
+
+	p = td->td_proc;
+	if ((p->p_flag & P_HWPMC) == 0) {
+		sx_sunlock(&pmc_sx);
+		goto done;
+	}
+
+	pause_thread = 0;
 
 	/*
 	 * Inform sampling PMC owners tracking this process.
 	 */
-	for (ri = 0; ri < md->pmd_npmc; ri++)
-		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
-		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	for (ri = 0; ri < md->pmd_npmc; ri++) {
+		if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+			continue;
+		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			pmclog_process_map_in(pm->pm_owner,
 			    pid, pkm->pm_address, fullpath);
+		if (PMC_TO_MODE(pm) == PMC_MODE_TT)
+			pause_thread = 1;
+	}
+
+	sx_sunlock(&pmc_sx);
+
+	if (pause_thread) {
+		PROC_LOCK(td->td_proc);
+		PROC_SLOCK(td->td_proc);
+		thread_suspend_switch(td, td->td_proc);
+		PROC_SUNLOCK(td->td_proc);
+		PROC_UNLOCK(td->td_proc);
+	}
 
   done:
 	if (freepath)
@@ -1580,11 +1614,14 @@
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		return;
 
-	for (ri = 0; ri < md->pmd_npmc; ri++)
-		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
-		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	for (ri = 0; ri < md->pmd_npmc; ri++) {
+		if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+			continue;
+		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			pmclog_process_map_out(pm->pm_owner, pid,
 			    pkm->pm_address, pkm->pm_address + pkm->pm_size);
+	}
 }
 
 /*
@@ -1598,7 +1635,8 @@
 	struct pmckern_map_in *km, *kmbase;
 
 	sx_assert(&pmc_sx, SX_LOCKED);
-	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+	    PMC_TO_MODE(pm) == PMC_MODE_ST,
 	    ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
 		__LINE__, (void *) pm));
 
@@ -1999,7 +2037,6 @@
 		break;
 
 	case PMC_FN_MMAP:
-		sx_assert(&pmc_sx, SX_LOCKED);
 		pmc_process_mmap(td, (struct pmckern_map_in *) arg);
 		break;
 
@@ -2115,8 +2152,8 @@
 
 	mtx_lock_spin(&pmc_processhash_mtx);
 	LIST_FOREACH(pp, pph, pp_next)
-	    if (pp->pp_proc == p)
-		    break;
+		if (pp->pp_proc == p)
+			break;
 
 	if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
 		LIST_REMOVE(pp, pp_next);
@@ -2652,7 +2689,8 @@
 	 * If this is a sampling mode PMC, log mapping information for
 	 * the kernel modules that are currently loaded.
 	 */
-	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+	    PMC_TO_MODE(pm) == PMC_MODE_ST)
 	    pmc_log_kernel_mappings(pm);
 
 	if (PMC_IS_VIRTUAL_MODE(mode)) {
@@ -3306,9 +3344,14 @@
 		mode = pa.pm_mode;
 		cpu  = pa.pm_cpu;
 
-		if ((mode != PMC_MODE_SS  &&  mode != PMC_MODE_SC  &&
-		     mode != PMC_MODE_TS  &&  mode != PMC_MODE_TC) ||
-		    (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
+		if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
+		    mode != PMC_MODE_SC && mode != PMC_MODE_TC &&
+		    mode != PMC_MODE_ST && mode != PMC_MODE_TT) {
+			error = EINVAL;
+			break;
+		}
+
+		if (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
@@ -3755,6 +3798,175 @@
 	}
 	break;
 
+	case PMC_OP_LOG_KERNEL_MAP:
+	{
+		struct pmc_op_simple sp;
+		struct pmc *pm;
+
+		if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(sp.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST)
+			break;
+
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		pmc_log_kernel_mappings(pm);
+	}
+	break;
+
+	case PMC_OP_THREAD_UNSUSPEND:
+	{
+		struct pmc_op_proc_unsuspend u;
+		struct proc *p;
+		struct pmc *pm;
+
+		if ((error = copyin(arg, &u, sizeof(u))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(u.pm_pmcid, &pm)) != 0)
+			break;
+
+		/* lookup pid */
+		if ((p = pfind(u.pm_pid)) == NULL) {
+			error = ESRCH;
+			break;
+		}
+
+		if ((p->p_flag & P_HWPMC) == 0)
+			break;
+
+		PROC_SLOCK(p);
+		thread_unsuspend(p);
+		PROC_SUNLOCK(p);
+		PROC_UNLOCK(p);
+	}
+	break;
+
+	case PMC_OP_TRACE_CONFIG:
+	{
+		struct pmc_op_trace_config trc;
+		uint64_t *ranges;
+		struct pmc *pm;
+		struct pmc_binding pb;
+		struct pmc_classdep *pcd;
+		uint32_t nranges;
+		uint32_t cpu;
+		uint32_t ri;
+		int adjri;
+
+		if ((error = copyin(arg, &trc, sizeof(trc))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(trc.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+		    PMC_TO_MODE(pm) != PMC_MODE_TT)
+			break;
+
+		/* Can't proceed with PMC that hasn't been started. */
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		cpu = trc.pm_cpu;
+
+		ri = PMC_TO_ROWINDEX(pm);
+		pcd = pmc_ri_to_classdep(md, ri, &adjri);
+		if (pcd->pcd_trace_config == NULL)
+			break;
+
+		/* switch to CPU 'cpu' */
+		pmc_save_cpu_binding(&pb);
+		pmc_select_cpu(cpu);
+
+		ranges = trc.ranges;
+		nranges = trc.nranges;
+
+		mtx_pool_lock_spin(pmc_mtxpool, pm);
+		error = (*pcd->pcd_trace_config)(cpu, adjri,
+		    pm, ranges, nranges);
+		mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+		pmc_restore_cpu_binding(&pb);
+	}
+	break;
+
+	/*
+	 * Read a PMC trace buffer ptr.
+	 */
+	case PMC_OP_TRACE_READ:
+	{
+		struct pmc_op_trace_read trr;
+		struct pmc_op_trace_read *trr_ret;
+		struct pmc_binding pb;
+		struct pmc_classdep *pcd;
+		struct pmc *pm;
+		pmc_value_t cycle;
+		pmc_value_t offset;
+		uint32_t cpu;
+		uint32_t ri;
+		int adjri;
+
+		if ((error = copyin(arg, &trr, sizeof(trr))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(trr.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+		    PMC_TO_MODE(pm) != PMC_MODE_TT)
+			break;
+
+		/* Can't read a PMC that hasn't been started. */
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		cpu = trr.pm_cpu;
+
+		ri = PMC_TO_ROWINDEX(pm);
+		pcd = pmc_ri_to_classdep(md, ri, &adjri);
+
+		/* switch to CPU 'cpu' */
+		pmc_save_cpu_binding(&pb);
+		pmc_select_cpu(cpu);
+
+		mtx_pool_lock_spin(pmc_mtxpool, pm);
+		error = (*pcd->pcd_read_trace)(cpu, adjri,
+		    pm, &cycle, &offset);
+		mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+		pmc_restore_cpu_binding(&pb);
+
+		trr_ret = (struct pmc_op_trace_read *)arg;
+		if ((error = copyout(&cycle, &trr_ret->pm_cycle,
+		    sizeof(trr.pm_cycle))))
+			break;
+		if ((error = copyout(&offset, &trr_ret->pm_offset,
+		    sizeof(trr.pm_offset))))
+			break;
+	}
+	break;
 
 	/*
 	 * Read and/or write a PMC.
@@ -3858,7 +4070,7 @@
 			/* save old value */
 			if (prw.pm_flags & PMC_F_OLDVALUE)
 				if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
-					 &oldvalue)))
+				    &oldvalue)))
 					goto error;
 			/* write out new value */
 			if (prw.pm_flags & PMC_F_NEWVALUE)
@@ -5029,6 +5241,8 @@
 		printf("\n");
 	}
 
+	pmc_vm_initialize(md);
+
 	return (error);
 }
 
@@ -5181,6 +5395,7 @@
 	}
 
 	pmclog_shutdown();
+	pmc_vm_finalize();
 
 	sx_xunlock(&pmc_sx); 	/* we are done */
 }
Index: sys/dev/hwpmc/hwpmc_pt.h
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_pt.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _DEV_HWPMC_PT_H_
+#define _DEV_HWPMC_PT_H_
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <vm/vm.h>
+
+#include <machine/frame.h>
+
+#define	PT_CPUID	0x14
+#define	PT_NADDR	4
+#define	PT_NPMCS	1
+
+struct pmc_md_pt_op_pmcallocate {
+	uint32_t		flags;
+#define	INTEL_PT_FLAG_BRANCHES	(1 << 0)
+#define	INTEL_PT_FLAG_TSC	(1 << 1)
+#define	INTEL_PT_FLAG_MTC	(1 << 2)
+#define	INTEL_PT_FLAG_DISRETC	(1 << 3)
+	uint64_t		ranges[2 * PT_NADDR];
+	int			nranges;
+};
+
+#ifdef	_KERNEL
+struct xsave_header {
+	uint64_t	xsave_bv;
+	uint64_t	xcomp_bv;
+	uint8_t		reserved[48];
+};
+
+struct pt_ext_area {
+	uint64_t	rtit_ctl;
+	uint64_t	rtit_output_base;
+	uint64_t	rtit_output_mask_ptrs;
+	uint64_t	rtit_status;
+	uint64_t	rtit_cr3_match;
+	uint64_t	rtit_addr0_a;
+	uint64_t	rtit_addr0_b;
+	uint64_t	rtit_addr1_a;
+	uint64_t	rtit_addr1_b;
+};
+
+struct pt_save_area {
+	uint8_t			legacy_state[512];
+	struct xsave_header	header;
+	struct pt_ext_area	pt_ext_area;
+} __aligned(64);
+
+struct topa_entry {
+	uint64_t base;
+	uint64_t size;
+	uint64_t offset;
+};
+
+struct pt_buffer {
+	uint64_t		*topa_hw;
+	struct topa_entry	*topa_sw;
+	uint64_t		cycle;
+	vm_object_t		obj;
+};
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_pt_pmc {
+	struct pt_buffer	pt_buffers[MAXCPU];
+};
+
+/*
+ * Prototypes.
+ */
+
+int	pmc_pt_initialize(struct pmc_mdep *_md, int _maxcpu);
+void	pmc_pt_finalize(struct pmc_mdep *_md);
+int	pmc_pt_intr(int cpu, struct trapframe *tf);
+
+#endif /* !_KERNEL */
+#endif /* !_DEV_HWPMC_PT_H */
Index: sys/dev/hwpmc/hwpmc_pt.c
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_pt.c
@@ -0,0 +1,952 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+#include <sys/pmckern.h>
+#include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/vmem.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/kthread.h>
+#include <sys/pmclog.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/specialreg.h>
+
+#include <dev/hwpmc/hwpmc_vm.h>
+
+#include <x86/apicvar.h>
+#include <x86/x86_var.h>
+
+static MALLOC_DEFINE(M_PT, "pt", "PT driver");
+static uint64_t pt_xsave_mask;
+
+extern struct cdev *pmc_cdev[MAXCPU];
+
+/*
+ * Intel PT support.
+ */
+
+#define	PT_CAPS	(PMC_CAP_READ | PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | PMC_CAP_USER)
+
+#define	PMC_PT_DEBUG
+#undef	PMC_PT_DEBUG
+
+#ifdef	PMC_PT_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+struct pt_descr {
+	struct pmc_descr pm_descr;  /* "base class" */
+};
+
+static struct pt_descr pt_pmcdesc[PT_NPMCS] =
+{
+    {
+	.pm_descr =
+	{
+		.pd_name  = "PT",
+		.pd_class = PMC_CLASS_PT,
+		.pd_caps  = PT_CAPS,
+		.pd_width = 64
+	}
+    }
+};
+
+/*
+ * Per-CPU data structure for PTs.
+ */
+
+struct pt_cpu {
+	struct pmc_hw			tc_hw;
+	uint32_t			l0_eax;
+	uint32_t			l0_ebx;
+	uint32_t			l0_ecx;
+	uint32_t			l1_eax;
+	uint32_t			l1_ebx;
+	struct pmc			*pm_mmap;
+	uint32_t			flags;
+#define	FLAG_PT_ALLOCATED		(1 << 0)
+	struct pt_save_area		save_area;
+};
+
+static struct pt_cpu **pt_pcpu;
+
+static __inline void
+xrstors(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xrstors %0" : : "m" (*addr), "a" (low), "d" (hi));
+}
+
+static __inline void
+xsaves(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xsaves %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+	    "memory");
+}
+
+static void
+pt_save_restore(struct pt_cpu *pt_pc, bool save)
+{
+	uint64_t val;
+
+	clts();
+	val = rxcr(XCR0);
+	load_xcr(XCR0, pt_xsave_mask);
+	wrmsr(MSR_IA32_XSS, XFEATURE_ENABLED_PT);
+	if (save) {
+		KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) != 0,
+		    ("%s: PT is disabled", __func__));
+		xsaves((char *)&pt_pc->save_area, XFEATURE_ENABLED_PT);
+	} else {
+		KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) == 0,
+		    ("%s: PT is enabled", __func__));
+		xrstors((char *)&pt_pc->save_area, XFEATURE_ENABLED_PT);
+	}
+	load_xcr(XCR0, val);
+	load_cr0(rcr0() | CR0_TS);
+}
+
+static void
+pt_configure_ranges(struct pt_cpu *pt_pc, const uint64_t *ranges,
+    uint32_t nranges)
+{
+	struct pt_ext_area *pt_ext;
+	struct pt_save_area *save_area;
+	int nranges_supp;
+	int n;
+
+	save_area = &pt_pc->save_area;
+	pt_ext = &save_area->pt_ext_area;
+
+	if (pt_pc->l0_ebx & CPUPT_IPF) {
+		/* How many ranges CPU does support ? */
+		nranges_supp = (pt_pc->l1_eax & CPUPT_NADDR_M) >> CPUPT_NADDR_S;
+
+		/* xsave/xrstor supports two ranges only */
+		if (nranges_supp > 2)
+			nranges_supp = 2;
+
+		n = nranges > nranges_supp ? nranges_supp : nranges;
+
+		switch (n) {
+		case 2:
+			pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(1));
+			pt_ext->rtit_addr1_a = ranges[2];
+			pt_ext->rtit_addr1_b = ranges[3];
+		case 1:
+			pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(0));
+			pt_ext->rtit_addr0_a = ranges[0];
+			pt_ext->rtit_addr0_b = ranges[1];
+		default:
+			break;
+		};
+	}
+}
+
+static int
+pt_buffer_allocate(uint32_t cpu, struct pt_buffer *pt_buf)
+{
+	struct pmc_vm_map *map;
+	struct pt_cpu *pt_pc;
+	uint64_t topa_size;
+	uint64_t segsize;
+	uint64_t offset;
+	uint32_t size;
+	uint32_t bufsize;
+	struct cdev_cpu *cc;
+	vm_object_t obj;
+	vm_page_t m;
+	int npages;
+	int ntopa;
+	int req;
+	int i, j;
+
+	pt_pc = pt_pcpu[cpu];
+
+	bufsize = 16 * 1024 * 1024;
+
+	if (pt_pc->l0_ecx & CPUPT_TOPA_MULTI)
+		topa_size = TOPA_SIZE_4K;
+	else
+		topa_size = TOPA_SIZE_16M;
+
+	segsize = PAGE_SIZE << (topa_size >> TOPA_SIZE_S);
+	ntopa = bufsize / segsize;
+	npages = segsize / PAGE_SIZE;
+
+	pt_buf->obj = obj = vm_pager_allocate(OBJT_PHYS, 0, bufsize,
+	    PROT_READ, 0, curthread->td_ucred);
+
+	size = roundup2((ntopa + 1) * 8, PAGE_SIZE);
+	pt_buf->topa_hw = malloc(size, M_PT, M_WAITOK | M_ZERO);
+	pt_buf->topa_sw = malloc(ntopa * sizeof(struct topa_entry), M_PT,
+	    M_WAITOK | M_ZERO);
+
+	VM_OBJECT_WLOCK(obj);
+	vm_object_reference_locked(obj);
+	offset = 0;
+	for (i = 0; i < ntopa; i++) {
+		req = VM_ALLOC_NOBUSY | VM_ALLOC_ZERO;
+		if (npages == 1)
+			m = vm_page_alloc(obj, i, req);
+		else
+			m = vm_page_alloc_contig(obj, i, req, npages, 0, ~0,
+			    bufsize, 0, VM_MEMATTR_DEFAULT);
+		if (m == NULL) {
+			VM_OBJECT_WUNLOCK(obj);
+			printf("%s: Can't allocate memory.\n", __func__);
+			goto error;
+		}
+		for (j = 0; j < npages; j++)
+			m[j].valid = VM_PAGE_BITS_ALL;
+		pt_buf->topa_sw[i].size = segsize;
+		pt_buf->topa_sw[i].offset = offset;
+		pt_buf->topa_hw[i] = VM_PAGE_TO_PHYS(m) | topa_size;
+		if (i == (ntopa - 1))
+			pt_buf->topa_hw[i] |= TOPA_INT;
+
+		offset += segsize;
+	}
+	VM_OBJECT_WUNLOCK(obj);
+
+	/* The last entry is a pointer to the base table. */
+	pt_buf->topa_hw[ntopa] = vtophys(pt_buf->topa_hw) | TOPA_END;
+	pt_buf->cycle = 0;
+
+	map = malloc(sizeof(struct pmc_vm_map), M_PT, M_WAITOK | M_ZERO);
+	map->t = curthread;
+	map->obj = obj;
+	map->buf = pt_buf;
+
+	cc = pmc_cdev[cpu]->si_drv1;
+
+	mtx_lock(&cc->vm_mtx);
+	TAILQ_INSERT_HEAD(&cc->pmc_maplist, map, map_next);
+	mtx_unlock(&cc->vm_mtx);
+
+	return (0);
+
+error:
+	free(pt_buf->topa_hw, M_PT);
+	free(pt_buf->topa_sw, M_PT);
+	vm_object_deallocate(obj);
+
+	return (-1);
+}
+
+static int
+pt_buffer_deallocate(uint32_t cpu, struct pt_buffer *pt_buf)
+{
+	struct pmc_vm_map *map, *map_tmp;
+	struct cdev_cpu *cc;
+
+	cc = pmc_cdev[cpu]->si_drv1;
+
+	mtx_lock(&cc->vm_mtx);
+	TAILQ_FOREACH_SAFE(map, &cc->pmc_maplist, map_next, map_tmp) {
+		if (map->buf == pt_buf) {
+			TAILQ_REMOVE(&cc->pmc_maplist, map, map_next);
+			free(map, M_PT);
+			break;
+		}
+	}
+	mtx_unlock(&cc->vm_mtx);
+
+	free(pt_buf->topa_hw, M_PT);
+	free(pt_buf->topa_sw, M_PT);
+	vm_object_deallocate(pt_buf->obj);
+
+	return (0);
+}
+
+static int
+pt_buffer_prepare(uint32_t cpu, struct pmc *pm,
+    const struct pmc_op_pmcallocate *a)
+{
+	const struct pmc_md_pt_op_pmcallocate *pm_pta;
+	struct pt_cpu *pt_pc;
+	struct pmc_md_pt_pmc *pm_pt;
+	struct pt_buffer *pt_buf;
+	struct xsave_header *hdr;
+	struct pt_ext_area *pt_ext;
+	struct pt_save_area *save_area;
+	enum pmc_mode mode;
+	int error;
+
+	pt_pc = pt_pcpu[cpu];
+	if ((pt_pc->l0_ecx & CPUPT_TOPA) == 0)
+		return (ENXIO);	/* We rely on TOPA support */
+
+	pm_pta = (const struct pmc_md_pt_op_pmcallocate *)&a->pm_md.pm_pt;
+	pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+	pt_buf = &pm_pt->pt_buffers[cpu];
+
+	error = pt_buffer_allocate(cpu, pt_buf);
+	if (error != 0) {
+		dprintf("%s: can't allocate buffers\n", __func__);
+		return (EINVAL);
+	}
+
+	save_area = &pt_pc->save_area;
+	bzero(save_area, sizeof(struct pt_save_area));
+
+	hdr = &save_area->header;
+	hdr->xsave_bv = XFEATURE_ENABLED_PT;
+	hdr->xcomp_bv = XFEATURE_ENABLED_PT | (1ULL << 63) /* compaction */;
+
+	pt_ext = &save_area->pt_ext_area;
+
+	pt_ext->rtit_ctl = RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
+	pt_ext->rtit_output_base = (uint64_t)vtophys(pt_buf->topa_hw);
+	pt_ext->rtit_output_mask_ptrs = 0x7f;
+
+	pt_configure_ranges(pt_pc, pm_pta->ranges, pm_pta->nranges);
+
+	/*
+	 * TODO
+	 * if (sc->l0_ebx & CPUPT_PRW) {
+	 *     reg |= RTIT_CTL_FUPONPTW;
+	 *     reg |= RTIT_CTL_PTWEN;
+	 * }
+	 */
+
+	mode = PMC_TO_MODE(pm);
+	if (mode == PMC_MODE_ST)
+		pt_ext->rtit_ctl |= RTIT_CTL_OS;
+	else if (mode == PMC_MODE_TT)
+		pt_ext->rtit_ctl |= RTIT_CTL_USER;
+	else {
+		dprintf("%s: unsupported mode %d\n", __func__, mode);
+		return (-1);
+	}
+
+	/* Enable FUP, TIP, TIP.PGE, TIP.PGD, TNT, MODE.Exec and MODE.TSX packets */
+	if (pm_pta->flags & INTEL_PT_FLAG_BRANCHES)
+		pt_ext->rtit_ctl |= RTIT_CTL_BRANCHEN;
+
+	if (pm_pta->flags & INTEL_PT_FLAG_TSC)
+		pt_ext->rtit_ctl |= RTIT_CTL_TSCEN;
+
+	if ((pt_pc->l0_ebx & CPUPT_MTC) &&
+	    (pm_pta->flags & INTEL_PT_FLAG_MTC))
+		pt_ext->rtit_ctl |= RTIT_CTL_MTCEN;
+
+	if (pm_pta->flags & INTEL_PT_FLAG_DISRETC)
+		pt_ext->rtit_ctl |= RTIT_CTL_DISRETC;
+
+	/*
+	 * TODO: specify MTC frequency
+	 * Note: Check Bitmap of supported MTC Period Encodings
+	 * pt_ext->rtit_ctl |= RTIT_CTL_MTC_FREQ(6);
+	 */
+
+	return (0);
+}
+
+static int
+pt_allocate_pmc(int cpu, int ri, struct pmc *pm,
+    const struct pmc_op_pmcallocate *a)
+{
+	struct pt_cpu *pt_pc;
+	int i;
+
+	if ((cpu_stdext_feature & CPUID_STDEXT_PROCTRACE) == 0)
+		return (ENXIO);
+
+	pt_pc = pt_pcpu[cpu];
+
+	dprintf("%s: curthread %lx, cpu %d (curcpu %d)\n", __func__,
+	    (uint64_t)curthread, cpu, PCPU_GET(cpuid));
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__,
+	    cpu, PCPU_GET(cpuid));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < PT_NPMCS,
+	    ("[pt,%d] illegal row index %d", __LINE__, ri));
+
+	if (a->pm_class != PMC_CLASS_PT)
+		return (EINVAL);
+
+	if (a->pm_ev != PMC_EV_PT_PT)
+		return (EINVAL);
+
+	if ((pm->pm_caps & PT_CAPS) == 0)
+		return (EINVAL);
+
+	if ((pm->pm_caps & ~PT_CAPS) != 0)
+		return (EPERM);
+
+	if (a->pm_mode != PMC_MODE_ST &&
+	    a->pm_mode != PMC_MODE_TT)
+		return (EINVAL);
+
+	/* Can't allocate multiple ST */
+	if (a->pm_mode == PMC_MODE_ST &&
+	    pt_pc->flags & FLAG_PT_ALLOCATED) {
+		dprintf("error: pt is already allocated for CPU %d\n", cpu);
+		return (EUSERS);
+	}
+
+	if (a->pm_mode == PMC_MODE_TT)
+		for (i = 0; i < pmc_cpu_max(); i++) {
+			if (pt_buffer_prepare(i, pm, a))
+				return (EINVAL);
+		}
+	else
+		if (pt_buffer_prepare(cpu, pm, a))
+			return (EINVAL);
+
+	if (a->pm_mode == PMC_MODE_ST)
+		pt_pc->flags |= FLAG_PT_ALLOCATED;
+
+	return (0);
+}
+
+int
+pmc_pt_intr(int cpu, struct trapframe *tf)
+{
+	struct pmc_md_pt_pmc *pm_pt;
+	struct pt_buffer *pt_buf;
+	struct pt_cpu *pt_pc;
+	struct pmc_hw *phw;
+	struct pmc *pm;
+
+	if (pt_pcpu == NULL)
+		return (0);
+
+	pt_pc = pt_pcpu[cpu];
+	if (pt_pc == NULL)
+		return (0);
+
+	phw = &pt_pc->tc_hw;
+	if (phw == NULL || phw->phw_pmc == NULL)
+		return (0);
+
+	pm = phw->phw_pmc;
+	if (pm == NULL)
+		return (0);
+
+	KASSERT(pm != NULL, ("pm is NULL\n"));
+
+	pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+	pt_buf = &pm_pt->pt_buffers[cpu];
+
+	atomic_add_long(&pt_buf->cycle, 1);
+
+	lapic_reenable_pmc();
+
+	return (1);
+}
+
+static int
+pt_config_pmc(int cpu, int ri, struct pmc *pm)
+{
+	struct pt_cpu *pt_pc;
+	struct pmc_hw *phw;
+
+	dprintf("%s: cpu %d (pm %lx)\n", __func__, cpu, (uint64_t)pm);
+
+	PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	pt_pc = pt_pcpu[cpu];
+	phw = &pt_pc->tc_hw;
+
+	KASSERT(pm == NULL || phw->phw_pmc == NULL,
+	    ("[pt,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__,
+	    pm, phw->phw_pmc));
+
+	phw->phw_pmc = pm;
+
+	return (0);
+}
+
+static int
+pt_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
+{
+	const struct pt_descr *pd;
+	struct pmc_hw *phw;
+	size_t copied;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &pt_pcpu[cpu]->tc_hw;
+	pd  = &pt_pmcdesc[ri];
+
+	if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
+	    PMC_NAME_MAX, &copied)) != 0)
+		return (error);
+
+	pi->pm_class = pd->pm_descr.pd_class;
+
+	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
+		pi->pm_enabled = TRUE;
+		*ppmc          = phw->phw_pmc;
+	} else {
+		pi->pm_enabled = FALSE;
+		*ppmc          = NULL;
+	}
+
+	return (0);
+}
+
+static int
+pt_get_config(int cpu, int ri, struct pmc **ppm)
+{
+	struct pmc_hw *phw;
+	struct pt_cpu *pt_pc;
+
+	dprintf("%s\n", __func__);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	pt_pc = pt_pcpu[cpu];
+	phw = &pt_pc->tc_hw;
+
+	*ppm = phw->phw_pmc;
+
+	return (0);
+}
+
+static void
+pt_enumerate(struct pt_cpu *pt_pc)
+{
+	u_int cp[4];
+	u_int *eax;
+	u_int *ebx;
+	u_int *ecx;
+
+	eax = &cp[0];
+	ebx = &cp[1];
+	ecx = &cp[2];
+
+	dprintf("Enumerating part 1\n");
+
+	cpuid_count(PT_CPUID, 0, cp);
+	dprintf("%s: Maximum valid sub-leaf Index: %x\n", __func__, cp[0]);
+	dprintf("%s: ebx %x\n", __func__, cp[1]);
+	dprintf("%s: ecx %x\n", __func__, cp[2]);
+
+	pt_pc->l0_eax = cp[0];
+	pt_pc->l0_ebx = cp[1];
+	pt_pc->l0_ecx = cp[2];
+
+	dprintf("Enumerating part 2\n");
+
+	cpuid_count(PT_CPUID, 1, cp);
+	dprintf("%s: eax %x\n", __func__, cp[0]);
+	dprintf("%s: ebx %x\n", __func__, cp[1]);
+
+	pt_pc->l1_eax = cp[0];
+	pt_pc->l1_ebx = cp[1];
+}
+
+static int
+pt_pcpu_init(struct pmc_mdep *md, int cpu)
+{
+	struct pmc_cpu *pc;
+	struct pt_cpu *pt_pc;
+	u_int cp[4];
+	int ri;
+
+	dprintf("%s: cpu %d\n", __func__, cpu);
+
+	/* We rely on XSAVE support */
+	if ((cpu_feature2 & CPUID2_XSAVE) == 0) {
+		printf("Intel PT: XSAVE is not supported\n");
+		return (ENXIO);
+	}
+
+	cpuid_count(0xd, 0x0, cp);
+	if ((cp[0] & pt_xsave_mask) != pt_xsave_mask) {
+		printf("Intel PT: CPU0 does not support X87 or SSE: %x", cp[0]);
+		return (ENXIO);
+	}
+
+	cpuid_count(0xd, 0x1, cp);
+	if ((cp[0] & (1 << 0)) == 0) {
+		printf("Intel PT: XSAVE compaction is not supported\n");
+		return (ENXIO);
+	}
+
+	if ((cp[0] & (1 << 3)) == 0) {
+		printf("Intel PT: XSAVES/XRSTORS are not supported\n");
+		return (ENXIO);
+	}
+
+	/* Enable XSAVE */
+	load_cr4(rcr4() | CR4_XSAVE);
+
+	KASSERT(cpu == PCPU_GET(cpuid), ("Init on wrong CPU\n"));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal cpu %d", __LINE__, cpu));
+	KASSERT(pt_pcpu, ("[pt,%d] null pcpu", __LINE__));
+	KASSERT(pt_pcpu[cpu] == NULL, ("[pt,%d] non-null per-cpu",
+	    __LINE__));
+
+	pt_pc = malloc(sizeof(struct pt_cpu), M_PT, M_WAITOK | M_ZERO);
+
+	pt_pc->tc_hw.phw_state = PMC_PHW_FLAG_IS_ENABLED |
+	    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
+	    PMC_PHW_FLAG_IS_SHAREABLE;
+
+	pt_pcpu[cpu] = pt_pc;
+
+	ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_ri;
+
+	KASSERT(pmc_pcpu, ("[pt,%d] null generic pcpu", __LINE__));
+
+	pc = pmc_pcpu[cpu];
+
+	KASSERT(pc, ("[pt,%d] null generic per-cpu", __LINE__));
+
+	pc->pc_hwpmcs[ri] = &pt_pc->tc_hw;
+
+	pt_enumerate(pt_pc);
+
+	return (0);
+}
+
+static int
+pt_pcpu_fini(struct pmc_mdep *md, int cpu)
+{
+	int ri;
+	struct pmc_cpu *pc;
+	struct pt_cpu *pt_pc;
+
+	dprintf("%s: cpu %d\n", __func__, cpu);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal cpu %d", __LINE__, cpu));
+	KASSERT(pt_pcpu[cpu] != NULL, ("[pt,%d] null pcpu", __LINE__));
+
+	pt_pc = pt_pcpu[cpu];
+
+	free(pt_pcpu[cpu], M_PT);
+	pt_pcpu[cpu] = NULL;
+
+	ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_ri;
+
+	pc = pmc_pcpu[cpu];
+	pc->pc_hwpmcs[ri] = NULL;
+
+	return (0);
+}
+
+static int
+pt_trace_config(int cpu, int ri, struct pmc *pm,
+    uint64_t *ranges, uint32_t nranges)
+{
+	struct pt_cpu *pt_pc;
+	uint64_t reg;
+
+	dprintf("%s\n", __func__);
+
+	pt_pc = pt_pcpu[cpu];
+
+	KASSERT(cpu == PCPU_GET(cpuid), ("Configuring wrong CPU\n"));
+	
+	/* Ensure tracing is turned off */
+	reg = rdmsr(MSR_IA32_RTIT_CTL);
+	if (reg & RTIT_CTL_TRACEEN)
+		pt_save_restore(pt_pc, true);
+
+	pt_configure_ranges(pt_pc, ranges, nranges);
+
+	return (0);
+}
+
+static int
+pt_read_trace(int cpu, int ri, struct pmc *pm,
+    pmc_value_t *cycle, pmc_value_t *voffset)
+{
+	struct pt_ext_area *pt_ext;
+	struct pt_save_area *save_area;
+	struct pmc_md_pt_pmc *pm_pt;
+	struct pt_buffer *pt_buf;
+	struct pt_cpu *pt_pc;
+	uint64_t offset;
+	uint64_t reg;
+	uint32_t idx;
+
+	pt_pc = pt_pcpu[cpu];
+	pt_pc->pm_mmap = pm;
+
+	pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+	pt_buf = &pm_pt->pt_buffers[cpu];
+
+	save_area = &pt_pc->save_area;
+	pt_ext = &save_area->pt_ext_area;
+
+	reg = rdmsr(MSR_IA32_RTIT_CTL);
+	if (reg & RTIT_CTL_TRACEEN)
+		reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
+	else
+		reg = pt_ext->rtit_output_mask_ptrs;
+
+	idx = (reg & 0xffffffff) >> 7;
+	*cycle = pt_buf->cycle;
+
+	offset = reg >> 32;
+	*voffset = pt_buf->topa_sw[idx].offset + offset;
+
+	dprintf("%s: %lx\n", __func__, rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+	dprintf("%s: cycle %ld offset %ld\n", __func__, pt_buf->cycle, offset);
+
+	return (0);
+}
+
+static int
+pt_read_pmc(int cpu, int ri, pmc_value_t *v)
+{
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal ri %d", __LINE__, ri));
+
+	*v = 0;
+
+	return (0);
+}
+
+static int
+pt_release_pmc(int cpu, int ri, struct pmc *pm)
+{
+	struct pmc_md_pt_pmc *pm_pt;
+	struct pt_cpu *pt_pc;
+	enum pmc_mode mode;
+	struct pmc_hw *phw;
+	int i;
+
+	pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+	pt_pc = pt_pcpu[cpu];
+
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0,
+	    ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &pt_pcpu[cpu]->tc_hw;
+	phw->phw_pmc = NULL;
+
+	KASSERT(phw->phw_pmc == NULL,
+	    ("[pt,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
+
+	dprintf("%s: cpu %d, output base %lx\n",
+	    __func__, cpu, rdmsr(MSR_IA32_RTIT_OUTPUT_BASE));
+	dprintf("%s: cpu %d, output base ptr %lx\n",
+	    __func__, cpu, rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+
+	mode = PMC_TO_MODE(pm);
+	if (mode == PMC_MODE_TT)
+		for (i = 0; i < pmc_cpu_max(); i++)
+			pt_buffer_deallocate(i, &pm_pt->pt_buffers[i]);
+	else
+		pt_buffer_deallocate(cpu, &pm_pt->pt_buffers[cpu]);
+
+	if (mode == PMC_MODE_ST)
+		pt_pc->flags &= ~FLAG_PT_ALLOCATED;
+
+	return (0);
+}
+
+static int
+pt_start_pmc(int cpu, int ri)
+{
+	struct pt_cpu *pt_pc;
+	struct pmc_hw *phw;
+
+	dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+	pt_pc = pt_pcpu[cpu];
+	phw = &pt_pc->tc_hw;
+	if (phw == NULL || phw->phw_pmc == NULL)
+		return (-1);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	pt_save_restore(pt_pc, false);
+
+	return (0);
+}
+
+static int
+pt_stop_pmc(int cpu, int ri)
+{
+	struct pt_cpu *pt_pc;
+
+	pt_pc = pt_pcpu[cpu];
+
+	dprintf("%s: cpu %d, output base %lx, ptr %lx\n", __func__, cpu,
+	    rdmsr(MSR_IA32_RTIT_OUTPUT_BASE),
+	    rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	/*
+	 * Save the PT state to memory.
+	 * This operation will disable tracing.
+	 */
+	pt_save_restore(pt_pc, true);
+
+	return (0);
+}
+
+static int
+pt_write_pmc(int cpu, int ri, pmc_value_t v)
+{
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+	return (0);
+}
+
+int
+pmc_pt_initialize(struct pmc_mdep *md, int maxcpu)
+{
+	struct pmc_classdep *pcd;
+
+	dprintf("%s\n", __func__);
+
+	pt_xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+
+	KASSERT(md != NULL, ("[pt,%d] md is NULL", __LINE__));
+	KASSERT(md->pmd_nclass >= 1, ("[pt,%d] dubious md->nclass %d",
+	    __LINE__, md->pmd_nclass));
+
+	pt_pcpu = malloc(sizeof(struct pt_cpu *) * maxcpu, M_PT,
+	    M_WAITOK | M_ZERO);
+
+	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT];
+
+	pcd->pcd_caps	= PT_CAPS;
+	pcd->pcd_class	= PMC_CLASS_PT;
+	pcd->pcd_num	= PT_NPMCS;
+	pcd->pcd_ri	= md->pmd_npmc;
+	pcd->pcd_width	= 64;
+
+	pcd->pcd_allocate_pmc = pt_allocate_pmc;
+	pcd->pcd_config_pmc   = pt_config_pmc;
+	pcd->pcd_describe     = pt_describe;
+	pcd->pcd_get_config   = pt_get_config;
+	pcd->pcd_pcpu_init    = pt_pcpu_init;
+	pcd->pcd_pcpu_fini    = pt_pcpu_fini;
+	pcd->pcd_read_pmc     = pt_read_pmc;
+	pcd->pcd_read_trace   = pt_read_trace;
+	pcd->pcd_trace_config = pt_trace_config;
+	pcd->pcd_release_pmc  = pt_release_pmc;
+	pcd->pcd_start_pmc    = pt_start_pmc;
+	pcd->pcd_stop_pmc     = pt_stop_pmc;
+	pcd->pcd_write_pmc    = pt_write_pmc;
+
+	md->pmd_npmc += PT_NPMCS;
+
+	return (0);
+}
+
+void
+pmc_pt_finalize(struct pmc_mdep *md)
+{
+
+	dprintf("%s\n", __func__);
+
+#ifdef INVARIANTS
+	int i, ncpus;
+
+	ncpus = pmc_cpu_max();
+	for (i = 0; i < ncpus; i++)
+		KASSERT(pt_pcpu[i] == NULL, ("[pt,%d] non-null pcpu cpu %d",
+		    __LINE__, i));
+
+	KASSERT(md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_class ==
+	    PMC_CLASS_PT, ("[pt,%d] class mismatch", __LINE__));
+#endif
+
+	free(pt_pcpu, M_PT);
+	pt_pcpu = NULL;
+}
Index: sys/dev/hwpmc/hwpmc_vm.h
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_vm.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DEV_HWPMC_VM_H_
+#define _DEV_HWPMC_VM_H_
+
+int pmc_vm_initialize(struct pmc_mdep *md);
+int pmc_vm_finalize(void);
+
+struct pmc_vm_map {
+	TAILQ_ENTRY(pmc_vm_map)		map_next;
+	struct thread			*t;
+	vm_object_t			obj;
+	void *				buf;
+};
+
+struct cdev_cpu {
+	struct pmc_mdep			*md;
+	struct mtx			vm_mtx;
+	TAILQ_HEAD(, pmc_vm_map)	pmc_maplist;
+	uint32_t			cpu;
+};
+
+#endif /* !_DEV_HWPMC_VM_H_ */
Index: sys/dev/hwpmc/hwpmc_vm.c
===================================================================
--- /dev/null
+++ sys/dev/hwpmc/hwpmc_vm.c
@@ -0,0 +1,134 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmckern.h>
+#include <sys/conf.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#define	PMC_VM_DEBUG
+#undef	PMC_VM_DEBUG
+
+#ifdef	PMC_VM_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+#include "hwpmc_vm.h"
+
+struct cdev *pmc_cdev[MAXCPU];
+
+static int
+pmc_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
+    vm_size_t mapsize, struct vm_object **objp, int nprot)
+{
+	struct pmc_vm_map *map, *map_tmp;
+	struct cdev_cpu *cc;
+
+	cc = cdev->si_drv1;
+
+	if (nprot != PROT_READ || *offset != 0)
+		return (ENXIO);
+
+	mtx_lock(&cc->vm_mtx);
+	TAILQ_FOREACH_SAFE(map, &cc->pmc_maplist, map_next, map_tmp) {
+		if (map->t == curthread) {
+			mtx_unlock(&cc->vm_mtx);
+			*objp = map->obj;
+			return (0);
+		}
+	}
+	mtx_unlock(&cc->vm_mtx);
+
+	return (ENXIO);
+}
+
+static struct cdevsw pmc_cdevsw = {
+	.d_version =		D_VERSION,
+	.d_mmap_single =	pmc_mmap_single,
+	.d_name =		"HWPMC",
+};
+
+int
+pmc_vm_initialize(struct pmc_mdep *md)
+{
+	unsigned int maxcpu;
+	struct cdev_cpu *cc;
+	int cpu;
+
+	maxcpu = pmc_cpu_max();
+
+	for (cpu = 0; cpu < maxcpu; cpu++) {
+		cc = malloc(sizeof(struct cdev_cpu), M_PMC, M_WAITOK | M_ZERO);
+		cc->cpu = cpu;
+		cc->md = md;
+		mtx_init(&cc->vm_mtx, "PMC VM", NULL, MTX_DEF);
+		TAILQ_INIT(&cc->pmc_maplist);
+
+		pmc_cdev[cpu] = make_dev(&pmc_cdevsw, 0, UID_ROOT, GID_WHEEL,
+		    0666, "pmc%d", cpu);
+		pmc_cdev[cpu]->si_drv1 = cc;
+	}
+
+	return (0);
+}
+
+int
+pmc_vm_finalize(void)
+{
+	unsigned int maxcpu;
+	struct cdev_cpu *cc;
+	int cpu;
+
+	maxcpu = pmc_cpu_max();
+
+	for (cpu = 0; cpu < maxcpu; cpu++) {
+		cc = pmc_cdev[cpu]->si_drv1;
+		mtx_destroy(&cc->vm_mtx);
+		free(cc, M_PMC);
+		destroy_dev(pmc_cdev[cpu]);
+	}
+
+	return (0);
+}
Index: sys/dev/hwpmc/pmc_events.h
===================================================================
--- sys/dev/hwpmc/pmc_events.h
+++ sys/dev/hwpmc/pmc_events.h
@@ -4843,6 +4843,20 @@
 #define	PMC_EV_TSC_FIRST	PMC_EV_TSC_TSC
 #define	PMC_EV_TSC_LAST		PMC_EV_TSC_TSC
 
+/* Intel PT */
+#define	__PMC_EV_PT()							\
+	__PMC_EV(PT, PT)
+
+#define	PMC_EV_PT_FIRST	PMC_EV_PT_PT
+#define	PMC_EV_PT_LAST	PMC_EV_PT_PT
+
+/* ARM CORESIGHT */
+#define	__PMC_EV_CORESIGHT()							\
+	__PMC_EV(CORESIGHT, CORESIGHT)
+
+#define	PMC_EV_CORESIGHT_FIRST	PMC_EV_CORESIGHT_CORESIGHT
+#define	PMC_EV_CORESIGHT_LAST		PMC_EV_CORESIGHT_CORESIGHT
+
 /*
  * Software events are dynamically defined.
  */
@@ -7141,6 +7155,7 @@
  * START	#EVENTS		DESCRIPTION
  * 0		0x1000		Reserved
  * 0x1000	0x0001		TSC
+ * 0x1100	0x0001		PT
  * 0x2000	0x0080		AMD K7 events
  * 0x2080	0x0100		AMD K8 events
  * 0x10000	0x0080		INTEL architectural fixed-function events
@@ -7157,11 +7172,14 @@
  * 0x13300	0x00FF		Freescale e500 events
  * 0x14000	0x0100		ARMv7 events
  * 0x14100	0x0100		ARMv8 events
+ * 0x14200	0x0001		ARM Coresight
  * 0x20000	0x1000		Software events
  */
 #define	__PMC_EVENTS()				\
 	__PMC_EV_BLOCK(TSC,	0x01000)	\
 	__PMC_EV_TSC()				\
+	__PMC_EV_BLOCK(PT,	0x1100)		\
+	__PMC_EV_PT()				\
 	__PMC_EV_BLOCK(K7,	0x2000)		\
 	__PMC_EV_K7()				\
 	__PMC_EV_BLOCK(K8,	0x2080)		\
@@ -7197,7 +7215,9 @@
 	__PMC_EV_BLOCK(ARMV7,	0x14000)	\
 	__PMC_EV_ARMV7()			\
 	__PMC_EV_BLOCK(ARMV8,	0x14100)	\
-	__PMC_EV_ARMV8()
+	__PMC_EV_ARMV8()			\
+	__PMC_EV_BLOCK(CORESIGHT, 0x14200)	\
+	__PMC_EV_CORESIGHT()
 
 #define	PMC_EVENT_FIRST	PMC_EV_TSC_TSC
 #define	PMC_EVENT_LAST	PMC_EV_SOFT_LAST
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -2489,7 +2489,7 @@
 		if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) {
 			pkm.pm_file = vp;
 			pkm.pm_address = (uintptr_t) *addr;
-			PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+			PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_MMAP, (void *) &pkm);
 		}
 	}
 #endif
Index: sys/modules/hwpmc/Makefile
===================================================================
--- sys/modules/hwpmc/Makefile
+++ sys/modules/hwpmc/Makefile
@@ -6,14 +6,16 @@
 
 KMOD=	hwpmc
 
-SRCS=	hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c vnode_if.h
+SRCS=	hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c hwpmc_vm.c vnode_if.h
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 SRCS+=  hwpmc_arm64.c hwpmc_arm64_md.c
+SRCS+=  hwpmc_cs.c
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+=	hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_piv.c hwpmc_tsc.c
+SRCS+=	hwpmc_pt.c
 SRCS+=	hwpmc_x86.c hwpmc_uncore.c
 SRCS+=	device_if.h bus_if.h
 .endif
Index: sys/sys/pmc.h
===================================================================
--- sys/sys/pmc.h
+++ sys/sys/pmc.h
@@ -101,6 +101,7 @@
 	__PMC_CPU(INTEL_BROADWELL_XEON, 0x97,   "Intel Broadwell Xeon") \
 	__PMC_CPU(INTEL_SKYLAKE, 0x98,   "Intel Skylake")		\
 	__PMC_CPU(INTEL_SKYLAKE_XEON, 0x99,   "Intel Skylake Xeon")	\
+	__PMC_CPU(INTEL_KABYLAKE, 0x9A,   "Intel Kabylake")	\
 	__PMC_CPU(INTEL_XSCALE,	0x100,	"Intel XScale")		\
 	__PMC_CPU(MIPS_24K,     0x200,  "MIPS 24K")		\
 	__PMC_CPU(MIPS_OCTEON,  0x201,  "Cavium Octeon")	\
@@ -151,7 +152,9 @@
 	__PMC_CLASS(ARMV7,	0x10,	"ARMv7")			\
 	__PMC_CLASS(ARMV8,	0x11,	"ARMv8")			\
 	__PMC_CLASS(MIPS74K,	0x12,	"MIPS 74K")			\
-	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")
+	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")		\
+	__PMC_CLASS(PT,		0x14,	"Intel PT")			\
+	__PMC_CLASS(CORESIGHT,	0x15,	"ARM Coresight")
 
 enum pmc_class {
 #undef  __PMC_CLASS
@@ -160,7 +163,7 @@
 };
 
 #define	PMC_CLASS_FIRST	PMC_CLASS_TSC
-#define	PMC_CLASS_LAST	PMC_CLASS_E500
+#define	PMC_CLASS_LAST	PMC_CLASS_CORESIGHT
 
 /*
  * A PMC can be in the following states:
@@ -231,7 +234,9 @@
 	__PMC_MODE(SS,	0)			\
 	__PMC_MODE(SC,	1)			\
 	__PMC_MODE(TS,	2)			\
-	__PMC_MODE(TC,	3)
+	__PMC_MODE(TC,	3)			\
+	__PMC_MODE(ST,	4)			\
+	__PMC_MODE(TT,	5)
 
 enum pmc_mode {
 #undef	__PMC_MODE
@@ -245,11 +250,11 @@
 #define	PMC_IS_COUNTING_MODE(mode)				\
 	((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
 #define	PMC_IS_SYSTEM_MODE(mode)				\
-	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
+	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC || (mode) == PMC_MODE_ST)
 #define	PMC_IS_SAMPLING_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
 #define	PMC_IS_VIRTUAL_MODE(mode)				\
-	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
+	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC || (mode) == PMC_MODE_TT)
 
 /*
  * PMC row disposition
@@ -341,7 +346,11 @@
 	__PMC_OP(PMCSTOP, "Stop a PMC")					\
 	__PMC_OP(WRITELOG, "Write a cookie to the log file")		\
 	__PMC_OP(CLOSELOG, "Close log file")				\
-	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")
+	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")		\
+	__PMC_OP(LOG_KERNEL_MAP, "Log kernel mappings")			\
+	__PMC_OP(THREAD_UNSUSPEND, "Thread unsuspend")			\
+	__PMC_OP(TRACE_READ, "Read trace buffer pointer")		\
+	__PMC_OP(TRACE_CONFIG, "Setup trace IP ranges")
 
 
 enum pmc_ops {
@@ -487,7 +496,6 @@
 	pmc_value_t	pm_value;	/* new&returned value */
 };
 
-
 /*
  * OP GETPMCINFO
  *
@@ -513,6 +521,40 @@
 	struct pmc_info	pm_pmcs[];	/* space for 'npmc' structures */
 };
 
+/*
+ * OP PROC_UNSUSPEND
+ *
+ * Unsuspend all threads of proc.
+ */
+
+struct pmc_op_proc_unsuspend {
+	pmc_id_t	pm_pmcid;
+	pid_t		pm_pid;
+};
+
+/*
+ * OP TRACE_CONFIG
+ */
+
+#define	PMC_FILTER_MAX_IP_RANGES	4
+
+struct pmc_op_trace_config {
+	pmc_id_t	pm_pmcid;
+	uint32_t	pm_cpu;		/* CPU number or PMC_CPU_ANY */
+	uint64_t	ranges[2 * PMC_FILTER_MAX_IP_RANGES];
+	uint32_t	nranges;
+};
+
+/*
+ * OP TRACE_READ
+ */
+
+struct pmc_op_trace_read {
+	pmc_id_t	pm_pmcid;
+	uint32_t	pm_cpu;
+	pmc_value_t	pm_cycle;	/* returned value */
+	pmc_value_t	pm_offset;	/* returned value */
+};
 
 /*
  * OP GETCPUINFO
@@ -520,7 +562,6 @@
  * Retrieve system CPU information.
  */
 
-
 struct pmc_classinfo {
 	enum pmc_class	pm_class;	/* class id */
 	uint32_t	pm_caps;	/* counter capabilities */
@@ -951,6 +992,12 @@
 	int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
 	int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
 
+	/* trace */
+	int (*pcd_read_trace)(int _cpu, int _ri, struct pmc *_pm,
+	    pmc_value_t *_cycle, pmc_value_t *_offset);
+	int (*pcd_trace_config)(int _cpu, int _ri, struct pmc *_pm,
+	    uint64_t *ranges, uint32_t nranges);
+
 	/* pmc allocation/release */
 	int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
 		const struct pmc_op_pmcallocate *_a);
@@ -978,7 +1025,7 @@
  * Machine dependent bits needed per CPU type.
  */
 
-struct pmc_mdep  {
+struct pmc_mdep {
 	uint32_t	pmd_cputype;    /* from enum pmc_cputype */
 	uint32_t	pmd_npmc;	/* number of PMCs per CPU */
 	uint32_t	pmd_nclass;	/* number of PMC classes present */
Index: sys/x86/include/specialreg.h
===================================================================
--- sys/x86/include/specialreg.h
+++ sys/x86/include/specialreg.h
@@ -104,6 +104,7 @@
 #define	XFEATURE_ENABLED_OPMASK		0x00000020
 #define	XFEATURE_ENABLED_ZMM_HI256	0x00000040
 #define	XFEATURE_ENABLED_HI16_ZMM	0x00000080
+#define	XFEATURE_ENABLED_PT		0x00000100
 
 #define	XFEATURE_AVX					\
     (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
Index: usr.sbin/Makefile
===================================================================
--- usr.sbin/Makefile
+++ usr.sbin/Makefile
@@ -183,6 +183,7 @@
 SUBDIR.${MK_PMC}+=	pmccontrol
 SUBDIR.${MK_PMC}+=	pmcstat
 SUBDIR.${MK_PMC}+=	pmcstudy
+SUBDIR.${MK_PMC}+=	pmctrace
 SUBDIR.${MK_PORTSNAP}+=	portsnap
 SUBDIR.${MK_PPP}+=	ppp
 SUBDIR.${MK_QUOTAS}+=	edquota
Index: usr.sbin/pmctrace/Makefile
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/Makefile
@@ -0,0 +1,22 @@
+#	@(#)Makefile	8.1 (Berkeley) 6/6/93
+# $FreeBSD$
+
+PROG=	pmctrace
+SRCS=	pmctrace.c
+MAN=
+
+LIBADD= elf pmc pmcstat
+
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+=	pmctrace_pt.c		\
+	pmctrace_pt.h
+LIBADD+= ipt
+.endif
+
+.if ${MACHINE_CPUARCH} == "aarch64"
+SRCS+=	pmctrace_cs.c		\
+	pmctrace_cs.h
+LIBADD+= opencsd
+.endif
+
+.include <bsd.prog.mk>
Index: usr.sbin/pmctrace/pmctrace.h
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCTRACE_H_
+#define _PMCTRACE_H_
+
+struct mtrace_data {
+	uint64_t ip;
+	int cpu;
+	struct pmcstat_process *pp;
+	uint32_t flags;
+};
+
+struct trace_cpu {
+	uint32_t cycle;
+	uint64_t offset;
+	struct mtrace_data mdata;
+	uint32_t bufsize;
+	void *base;
+	int fd;
+};
+
+struct trace_dev_methods {
+	int (*process)(struct trace_cpu *, struct pmcstat_process *,
+	    uint32_t cpu, uint32_t cycle, uint64_t offset);
+	int (*init)(struct trace_cpu *tc);
+	int (*option)(int option);
+};
+
+struct trace_dev {
+	const char *ev_spec;
+	struct trace_dev_methods *methods;
+};
+
+struct pmctrace_config {
+	struct trace_dev *trace_dev;
+	uint32_t flags;
+};
+
+#endif /* !_PMCTRACE_H_ */
Index: usr.sbin/pmctrace/pmctrace.c
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace.c
@@ -0,0 +1,683 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/event.h>
+#include <sys/cpuset.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+
+#include <assert.h>
+#include <signal.h>
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <libgen.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <sysexits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <libpmcstat.h>
+
+#include "pmctrace.h"
+#if defined(__amd64__)
+#include "pmctrace_pt.h"
+#endif
+#if defined(__aarch64__)
+#include "pmctrace_cs.h"
+#endif
+
+#define	MAX_CPU	4096
+
+#define	PMCTRACE_DEBUG
+#undef	PMCTRACE_DEBUG
+
+#ifdef	PMCTRACE_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+static struct pmcstat_args args;
+static struct kevent kev;
+static struct pmcstat_process *pmcstat_kernproc;
+static struct pmcstat_stats pmcstat_stats;
+static struct trace_cpu *trace_cpus[MAX_CPU];
+static struct pmc_plugins plugins[] = {};
+
+static int pmcstat_sockpair[NSOCKPAIRFD];
+static int pmcstat_kq;
+static int pmcstat_npmcs;
+static int pmcstat_mergepmc;
+static int ps_samples_period;
+
+struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH];
+struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH];
+struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs);
+
+static struct trace_dev trace_devs[] = {
+#if defined(__amd64__)
+	{ "pt",		&ipt_methods },
+#elif defined(__aarch64__)
+	{ "coresight",	&cs_methods },
+#endif
+	{ NULL,	NULL }
+};
+
+static struct pmctrace_config pmctrace_cfg;
+
+static int
+pmctrace_ncpu(void)
+{
+	size_t ncpu_size;
+	int error;
+	int ncpu;
+
+	ncpu_size = sizeof(ncpu);
+	error = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
+	if (error)
+		return (-1);
+
+	return (ncpu);
+}
+
+static int
+pmctrace_init_cpu(uint32_t cpu)
+{
+	struct trace_cpu *tc;
+	char filename[16];
+	struct mtrace_data *mdata;
+
+	tc = trace_cpus[cpu];
+	mdata = &tc->mdata;
+	mdata->ip = 0;
+	mdata->cpu = cpu;
+
+	sprintf(filename, "/dev/pmc%d", cpu);
+
+	tc->fd = open(filename, O_RDWR);
+	if (tc->fd < 0) {
+		printf("Can't open %s\n", filename);
+		return (-1);
+	}
+
+	tc->bufsize = 16 * 1024 * 1024;
+	tc->cycle = 0;
+	tc->offset = 0;
+
+	tc->base = mmap(NULL, tc->bufsize, PROT_READ, MAP_SHARED, tc->fd, 0);
+	if (tc->base == MAP_FAILED) {
+		printf("mmap failed: err %d\n", errno);
+		return (-1);
+	}
+	dprintf("%s: tc->base %lx, *tc->base %lx\n", __func__,
+	    (uint64_t)tc->base, *(uint64_t *)tc->base);
+
+	if (pmctrace_cfg.trace_dev->methods->init != NULL)
+		pmctrace_cfg.trace_dev->methods->init(tc);
+
+	return (0);
+}
+
+static int
+pmctrace_process_cpu(int cpu, struct pmcstat_ev *ev)
+{
+	struct pmcstat_process *pp;
+	struct pmcstat_target *pt;
+	pmc_value_t offset;
+	pmc_value_t cycle;
+	struct trace_cpu *tc;
+	struct trace_dev *trace_dev;
+
+	trace_dev = pmctrace_cfg.trace_dev;
+	tc = trace_cpus[cpu];
+
+	pmc_read_trace(cpu, ev->ev_pmcid, &cycle, &offset);
+
+	dprintf("cpu %d cycle %lx offset %lx\n", cpu, cycle, offset);
+
+	pt = SLIST_FIRST(&args.pa_targets);
+	if (pt != NULL)
+		pp = pmcstat_process_lookup(pt->pt_pid, 0);
+	else
+		pp = pmcstat_kernproc;
+
+	if (pp)
+		trace_dev->methods->process(tc, pp, cpu, cycle, offset);
+	else
+		dprintf("pp not found\n");
+
+	return (0);
+}
+
+static int
+pmctrace_process_all(int user_mode)
+{
+	struct pmcstat_ev *ev;
+	int ncpu;
+	int i;
+
+	ncpu = pmctrace_ncpu();
+	if (ncpu < 0)
+		errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+	if (user_mode) {
+		ev = STAILQ_FIRST(&args.pa_events);
+		for (i = 0; i < ncpu; i++)
+			pmctrace_process_cpu(i, ev);
+	} else
+		STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+			pmctrace_process_cpu(ev->ev_cpu, ev);
+
+	return (0);
+}
+
+static void
+pmctrace_cleanup(void)
+{
+	struct pmcstat_ev *ev;
+
+	/* release allocated PMCs. */
+	STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+		if (ev->ev_pmcid != PMC_ID_INVALID) {
+			if (pmc_stop(ev->ev_pmcid) < 0)
+				err(EX_OSERR,
+				    "ERROR: cannot stop pmc 0x%x \"%s\"",
+				    ev->ev_pmcid, ev->ev_name);
+			if (pmc_release(ev->ev_pmcid) < 0)
+				err(EX_OSERR,
+				    "ERROR: cannot release pmc 0x%x \"%s\"",
+				    ev->ev_pmcid, ev->ev_name);
+		}
+
+	/* de-configure the log file if present. */
+	if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE))
+		(void) pmc_configure_logfile(-1);
+
+	if (args.pa_logparser) {
+		pmclog_close(args.pa_logparser);
+		args.pa_logparser = NULL;
+	}
+
+	pmcstat_shutdown_logging(&args, plugins, &pmcstat_stats);
+}
+
+static void
+pmctrace_start_pmcs(void)
+{
+	struct pmcstat_ev *ev;
+
+	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+		dprintf("starting ev->ev_cpu %d\n", ev->ev_cpu);
+		assert(ev->ev_pmcid != PMC_ID_INVALID);
+		if (pmc_start(ev->ev_pmcid) < 0) {
+			warn("ERROR: Cannot start pmc 0x%x \"%s\"",
+			    ev->ev_pmcid, ev->ev_name);
+			pmctrace_cleanup();
+			exit(EX_OSERR);
+		}
+	}
+}
+
+static int
+pmctrace_open_logfile(void)
+{
+	int pipefd[2];
+
+	/*
+	 * process the log on the fly by reading it in
+	 * through a pipe.
+	 */
+	if (pipe(pipefd) < 0)
+		err(EX_OSERR, "ERROR: pipe(2) failed");
+
+	if (fcntl(pipefd[READPIPEFD], F_SETFL, O_NONBLOCK) < 0)
+		err(EX_OSERR, "ERROR: fcntl(2) failed");
+
+	EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD,
+	    0, 0, NULL);
+
+	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+		err(EX_OSERR, "ERROR: Cannot register kevent");
+
+	args.pa_logfd = pipefd[WRITEPIPEFD];
+	args.pa_flags |= FLAG_HAS_PIPE;
+	args.pa_logparser = pmclog_open(pipefd[READPIPEFD]);
+
+	if (pmc_configure_logfile(args.pa_logfd) < 0)
+		err(EX_OSERR, "ERROR: Cannot configure log file");
+
+	return (0);
+}
+
+static int
+pmctrace_find_kernel(void)
+{
+	struct stat sb;
+	char buffer[PATH_MAX];
+	size_t len;
+	char *tmp;
+
+	/* Default to using the running system kernel. */
+	len = 0;
+	if (sysctlbyname("kern.bootfile", NULL, &len, NULL, 0) == -1)
+		err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
+	args.pa_kernel = malloc(len);
+	if (args.pa_kernel == NULL)
+		errx(EX_SOFTWARE, "ERROR: Out of memory.");
+	if (sysctlbyname("kern.bootfile", args.pa_kernel, &len, NULL, 0) == -1)
+		err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
+
+	/*
+	 * Check if 'kerneldir' refers to a file rather than a
+	 * directory.  If so, use `dirname path` to determine the
+	 * kernel directory.
+	 */
+	(void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot,
+	    args.pa_kernel);
+	if (stat(buffer, &sb) < 0)
+		err(EX_OSERR, "ERROR: Cannot locate kernel \"%s\"",
+		    buffer);
+	if (!S_ISREG(sb.st_mode) && !S_ISDIR(sb.st_mode))
+		errx(EX_USAGE, "ERROR: \"%s\": Unsupported file type.",
+		    buffer);
+	if (!S_ISDIR(sb.st_mode)) {
+		tmp = args.pa_kernel;
+		args.pa_kernel = strdup(dirname(args.pa_kernel));
+		if (args.pa_kernel == NULL)
+			errx(EX_SOFTWARE, "ERROR: Out of memory");
+		free(tmp);
+		(void) snprintf(buffer, sizeof(buffer), "%s%s",
+		    args.pa_fsroot, args.pa_kernel);
+		if (stat(buffer, &sb) < 0)
+			err(EX_OSERR, "ERROR: Cannot stat \"%s\"",
+			    buffer);
+		if (!S_ISDIR(sb.st_mode))
+			errx(EX_USAGE,
+			    "ERROR: \"%s\" is not a directory.",
+			    buffer);
+	}
+
+	return (0);
+}
+
+static void
+pmctrace_setup_cpumask(cpuset_t *cpumask)
+{
+	cpuset_t rootmask;
+
+	/*
+	 * The initial CPU mask specifies the root mask of this process
+	 * which is usually all CPUs in the system.
+	 */
+	if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+	    sizeof(rootmask), &rootmask) == -1)
+		err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
+	CPU_COPY(&rootmask, cpumask);
+}
+
+static int
+pmctrace_delayed_start(bool user_mode, char *func_name, char *func_image)
+{
+	uint64_t ranges[2];
+	struct pmcstat_symbol *sym;
+	struct pmcstat_target *pt;
+	struct pmcstat_process *pp;
+	struct pmcstat_ev *ev;
+	uintptr_t addr_start;
+	uintptr_t addr_end;
+	int ncpu;
+	int i;
+
+	if (func_name == NULL || func_image == NULL)
+		return (0);
+
+	ncpu = pmctrace_ncpu();
+	if (ncpu < 0)
+		errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+	if (user_mode) {
+		pt = SLIST_FIRST(&args.pa_targets);
+		if (pt == NULL)
+			errx(EX_SOFTWARE, "ERROR: can't get target.");
+		pp = pmcstat_process_lookup(pt->pt_pid, 0);
+		if (pp == NULL)
+			errx(EX_SOFTWARE, "ERROR: pp is NULL, pid %d\n",
+			    (uint32_t)pt->pt_pid);
+	} else
+		pp = pmcstat_kernproc;
+
+	sym = pmcstat_symbol_search_by_name(pp, func_image, func_name,
+	    &addr_start, &addr_end);
+	if (!sym)
+		return (0);
+
+	dprintf("%s: SYM addr start %lx end %lx\n",
+	    __func__, addr_start, addr_end);
+
+	ranges[0] = addr_start;
+	ranges[1] = addr_end;
+
+	if (user_mode) {
+		ev = STAILQ_FIRST(&args.pa_events);
+		for (i = 0; i < ncpu; i++)
+			pmc_trace_config(i, ev->ev_pmcid, &ranges[0], 1);
+	} else {
+		STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+			pmc_trace_config(ev->ev_cpu,
+			    ev->ev_pmcid, &ranges[0], 1);
+	}
+
+	pmctrace_start_pmcs();
+
+	return (1);
+}
+
+
+static int
+pmctrace_run(bool user_mode, char *func_name, char *func_image)
+{
+	struct pmcstat_target *pt;
+	struct pmcstat_process *pp;
+	struct pmcstat_ev *ev;
+	int stopping;
+	int running;
+	int started;
+	int c;
+
+	stopping = 0;
+	running = 3;
+	started = 0;
+
+	if (user_mode) {
+		pmcstat_create_process(pmcstat_sockpair, &args, pmcstat_kq);
+		pmcstat_attach_pmcs(&args);
+		if (func_name == NULL || func_image == NULL) {
+			pmctrace_start_pmcs();
+			started = 1;
+		}
+		pmcstat_start_process(pmcstat_sockpair);
+	} else {
+		if (func_name == NULL || func_image == NULL) {
+			pmctrace_start_pmcs();
+			started = 1;
+		} else {
+			ev = STAILQ_FIRST(&args.pa_events);
+			STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+				pmc_log_kmap(ev->ev_pmcid);
+			}
+		}
+	}
+
+	do {
+		if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
+			if (errno != EINTR)
+				err(EX_OSERR, "ERROR: kevent failed");
+			else
+				continue;
+		}
+
+		dprintf("%s: pmcstat event: filter %d, ident %ld\n",
+		    __func__, kev.filter, kev.ident);
+
+		if (kev.flags & EV_ERROR)
+			errc(EX_OSERR, kev.data, "ERROR: kevent failed");
+
+		switch (kev.filter) {
+		case EVFILT_PROC:
+			stopping = 1;
+			break;
+		case EVFILT_READ:
+			args.pa_flags |= FLAG_DO_ANALYSIS;
+			pmcstat_analyze_log(&args, plugins, &pmcstat_stats,
+			    pmcstat_kernproc, pmcstat_mergepmc, &pmcstat_npmcs,
+			    &ps_samples_period);
+
+			if (started == 0 &&
+			    pmctrace_delayed_start(user_mode, func_name, func_image) == 1)
+				started = 1;
+
+			if (user_mode) {
+				pt = SLIST_FIRST(&args.pa_targets);
+				ev = STAILQ_FIRST(&args.pa_events);
+				pmc_proc_unsuspend(ev->ev_pmcid, pt->pt_pid);
+			}
+
+			break;
+		case EVFILT_TIMER:
+			pmc_flush_logfile();
+
+			pp = pmcstat_kernproc;
+			if (!user_mode && TAILQ_EMPTY(&pp->pp_map))
+				break;
+
+			pmctrace_process_all(user_mode);
+
+			if (stopping)
+				running -= 1;
+			break;
+		}
+	} while (running > 0);
+
+	return (0);
+}
+
+static void
+usage(void)
+{
+
+	errx(EX_USAGE,
+		"[options] [commandline]\n"
+		"\t -s device\t\tTrace kernel\n"
+		"\t -u device\t\tTrace userspace\n"
+	);
+}
+
+int
+main(int argc, char *argv[])
+{
+	struct pmcstat_ev *ev;
+	bool user_mode;
+	bool supervisor_mode;
+	int option;
+	cpuset_t cpumask;
+	char *func_name;
+	char *func_image;
+	int ncpu;
+	int i;
+
+	bzero(&args, sizeof(struct pmcstat_args));
+	bzero(&pmctrace_cfg, sizeof(struct pmctrace_config));
+
+	func_name = NULL;
+	func_image = NULL;
+
+	user_mode = 0;
+	supervisor_mode = 0;
+
+	STAILQ_INIT(&args.pa_events);
+	SLIST_INIT(&args.pa_targets);
+	CPU_ZERO(&cpumask);
+
+	args.pa_fsroot = strdup("/");	/* TODO */
+
+	pmctrace_find_kernel();
+	pmctrace_setup_cpumask(&cpumask);
+
+	while ((option = getopt(argc, argv,
+	    "htu:s:i:f:")) != -1)
+		switch (option) {
+		case 'i':
+			func_image = strdup(optarg);
+			break;
+		case 'f':
+			func_name = strdup(optarg);
+			break;
+		case 'u':
+		case 's':
+			if (ev != NULL)
+				usage();
+
+			if ((ev = malloc(sizeof(struct pmcstat_ev))) == NULL)
+				errx(EX_SOFTWARE, "ERROR: Out of memory.");
+			if (option == 'u') {
+				user_mode = 1;
+				ev->ev_mode = PMC_MODE_TT;
+				args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
+			} else {
+				ev->ev_mode = PMC_MODE_ST;
+				supervisor_mode = 1;
+			}
+			ev->ev_spec = strdup(optarg);
+			if (ev->ev_spec == NULL)
+				errx(EX_SOFTWARE, "ERROR: Out of memory.");
+
+			for (i = 0; trace_devs[i].ev_spec != NULL; i++) {
+				if (strncmp(trace_devs[i].ev_spec, ev->ev_spec,
+				    strlen(trace_devs[i].ev_spec)) == 0) {
+					/* found */
+					pmctrace_cfg.trace_dev = &trace_devs[i];
+					break;
+				}
+			}
+
+			if (pmctrace_cfg.trace_dev == NULL)
+				errx(EX_SOFTWARE, "ERROR: trace device not found");
+			break;
+		case 't':
+			if (ev == NULL)
+				usage();
+
+			if (pmctrace_cfg.trace_dev->methods->option != NULL)
+				pmctrace_cfg.trace_dev->methods->option(option);
+			break;
+		case 'h':
+			usage();
+		default:
+			break;
+		};
+
+	if ((user_mode == 0 && supervisor_mode == 0) ||
+	    (user_mode == 1 && supervisor_mode == 1))
+		errx(EX_USAGE, "ERROR: specify -u or -s");
+
+	if ((func_image == NULL && func_name != NULL) ||
+	    (func_image != NULL && func_name == NULL))
+		errx(EX_USAGE, "ERROR: specify both or neither -i and -f");
+
+	args.pa_argc = (argc -= optind);
+	args.pa_argv = (argv += optind);
+	args.pa_cpumask = cpumask;
+
+	if (user_mode && !argc)
+		errx(EX_USAGE, "ERROR: user mode requires command to be specified");
+	if (supervisor_mode && argc)
+		errx(EX_USAGE, "ERROR: supervisor mode does not require command");
+
+	args.pa_required |= (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE);
+
+	ev->ev_saved = 0LL;
+	ev->ev_pmcid = PMC_ID_INVALID;
+	ev->ev_name = strdup("pmctrace");
+	ev->ev_flags = 0;
+
+	if (!user_mode)
+		ev->ev_cpu = CPU_FFS(&cpumask) - 1;
+	else
+		ev->ev_cpu = PMC_CPU_ANY;
+
+	STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next);
+
+	if (!user_mode) {
+		CPU_CLR(ev->ev_cpu, &cpumask);
+		pmcstat_clone_event_descriptor(ev, &cpumask, &args);
+		CPU_SET(ev->ev_cpu, &cpumask);
+	}
+
+	ncpu = pmctrace_ncpu();
+	if (ncpu < 0)
+		errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+	if (pmc_init() < 0)
+		err(EX_UNAVAILABLE, "ERROR: Initialization of the pmc(3) library failed");
+
+	if ((pmcstat_kq = kqueue()) < 0)
+		err(EX_OSERR, "ERROR: Cannot allocate kqueue");
+
+	pmctrace_open_logfile();
+
+	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+		if (pmc_allocate(ev->ev_spec, ev->ev_mode,
+			ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0)
+			err(EX_OSERR,
+			    "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"",
+			    PMC_IS_SYSTEM_MODE(ev->ev_mode) ?
+			    "system" : "process", ev->ev_spec);
+	}
+
+	for (i = 0; i < ncpu; i++) {
+		trace_cpus[i] = malloc(sizeof(struct trace_cpu));
+		pmctrace_init_cpu(i);
+	}
+
+	EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 100, NULL);
+
+	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+		err(EX_OSERR, "ERROR: Cannot register kevent for timer");
+
+	pmcstat_initialize_logging(&pmcstat_kernproc,
+	    &args, plugins, &pmcstat_npmcs, &pmcstat_mergepmc);
+
+	pmctrace_run(user_mode, func_name, func_image);
+
+	return (0);
+}
Index: usr.sbin/pmctrace/pmctrace_cs.h
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace_cs.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCTRACE_CS_H_
+#define _PMCTRACE_CS_H_
+
+extern struct trace_dev_methods cs_methods;
+
+#endif /* !_PMCTRACE_CS_H_ */
Index: usr.sbin/pmctrace/pmctrace_cs.c
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace_cs.c
@@ -0,0 +1,535 @@
+/*-
+ * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+ 
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/ttycom.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <curses.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <kvm.h>
+#include <libgen.h>
+#include <limits.h>
+#include <math.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <gelf.h>
+#include <inttypes.h>
+
+#include <libpmcstat.h>
+
+#include "pmctrace.h"
+#include "pmctrace_cs.h"
+
+#include <opencsd/c_api/ocsd_c_api_types.h>
+#include <opencsd/c_api/opencsd_c_api.h>
+
+#define	PMCTRACE_CS_DEBUG
+#undef	PMCTRACE_CS_DEBUG
+
+#ifdef	PMCTRACE_CS_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+#define	PACKET_STR_LEN	1024
+static char packet_str[PACKET_STR_LEN];
+
+static dcd_tree_handle_t dcdtree_handle;
+
+static int cs_init(struct trace_cpu *tc);
+static int cs_flags;
+#define	FLAG_FORMAT			(1 << 0)
+#define	FLAG_FRAME_RAW_UNPACKED		(1 << 1)
+#define	FLAG_FRAME_RAW_PACKED		(1 << 2)
+#define	FLAG_CALLBACK_MEM_ACC		(1 << 3)
+
+static struct pmcstat_symbol *
+symbol_lookup(const struct mtrace_data *mdata, uint64_t ip, struct pmcstat_image **img)
+{
+	struct pmcstat_image *image;
+	struct pmcstat_symbol *sym;
+	struct pmcstat_pcmap *map;
+	uint64_t newpc;
+
+	map = pmcstat_process_find_map(mdata->pp, ip);
+	if (map != NULL) {
+		image = map->ppm_image;
+		newpc = ip - (map->ppm_lowpc +
+		    (image->pi_vaddr - image->pi_start));
+
+		sym = pmcstat_symbol_search(image, newpc);
+		*img = image;
+
+		if (sym == NULL)
+			dprintf("cpu%d: symbol 0x%lx not found\n", mdata->cpu, newpc);
+
+		return (sym);
+	} else {
+		dprintf("cpu%d: 0x%lx map not found\n", mdata->cpu, ip);
+	}
+
+	return (NULL);
+}
+
+static ocsd_err_t
+attach_raw_printers(dcd_tree_handle_t dcd_tree_h)
+{
+	ocsd_err_t err;
+	int flags;
+
+	flags = 0;
+	err = OCSD_OK;
+
+	if (cs_flags & FLAG_FRAME_RAW_UNPACKED)
+		flags |= OCSD_DFRMTR_UNPACKED_RAW_OUT;
+
+	if (cs_flags & FLAG_FRAME_RAW_PACKED)
+		flags |= OCSD_DFRMTR_PACKED_RAW_OUT;
+
+	if (flags)
+		err = ocsd_dt_set_raw_frame_printer(dcd_tree_h, flags);
+
+	return err;
+}
+
+static int
+print_data_array(const uint8_t *p_array, const int array_size,
+    char *p_buffer, int buf_size)
+{
+	int bytes_processed;
+	int chars_printed;
+
+	chars_printed = 0;
+	p_buffer[0] = 0;
+
+	if (buf_size > 9) {
+		strcat(p_buffer, "[ ");
+		chars_printed += 2;
+ 
+		for (bytes_processed = 0; bytes_processed < array_size;
+		    bytes_processed++) {
+			sprintf(p_buffer + chars_printed, "0x%02X ",
+			    p_array[bytes_processed]);
+			chars_printed += 5;
+			if ((chars_printed + 5) > buf_size)
+				break;
+		}
+
+		strcat(p_buffer, "];");
+		chars_printed += 2;
+	} else if (buf_size >= 4) {
+		sprintf(p_buffer, "[];");
+		chars_printed += 3;
+	}
+
+	return (chars_printed);
+}
+
+static void
+packet_monitor(void *context __unused,
+    const ocsd_datapath_op_t op,
+    const ocsd_trc_index_t index_sop,
+    const void *p_packet_in,
+    const uint32_t size,
+    const uint8_t *p_data)
+{
+	int offset;
+
+	offset = 0;
+ 
+	switch(op) {
+	case OCSD_OP_DATA:
+		sprintf(packet_str, "Idx:%"  OCSD_TRC_IDX_STR ";", index_sop);
+		offset = strlen(packet_str);
+		offset += print_data_array(p_data, size, packet_str + offset,
+		    PACKET_STR_LEN - offset);
+
+		/*
+		 * Got a packet -- convert to string and use the libraries'
+		 * message output to print to file and stdoout
+		 */
+
+		if (ocsd_pkt_str(OCSD_PROTOCOL_ETMV4I, p_packet_in, packet_str + offset,
+		    PACKET_STR_LEN - offset) == OCSD_OK) {
+			/* add in <CR> */
+			if (strlen(packet_str) == PACKET_STR_LEN - 1) /* maximum length */
+				packet_str[PACKET_STR_LEN - 2] = '\n';
+			else
+				strcat(packet_str,"\n");
+
+			/* print it using the library output logger. */
+			ocsd_def_errlog_msgout(packet_str);
+		}
+		break;
+
+	case OCSD_OP_EOT:
+		sprintf(packet_str,"**** END OF TRACE ****\n");
+		ocsd_def_errlog_msgout(packet_str);
+		break;
+	default:
+		printf("%s: unknown op %d\n", __func__, op);
+		break;
+	}
+}
+
+static uint32_t
+cs_cs_decoder__mem_access(const void *context __unused,
+    const ocsd_vaddr_t address __unused, const ocsd_mem_space_acc_t mem_space __unused,
+    const uint32_t req_size __unused, uint8_t *buffer __unused)
+{
+
+	/* TODO */
+
+	return (0);
+}
+
+static ocsd_err_t
+create_test_memory_acc(dcd_tree_handle_t handle, uint64_t base, uint64_t start, uint64_t end)
+{
+	ocsd_vaddr_t address;
+	uint8_t *p_mem_buffer;
+	uint32_t mem_length;
+	int ret;
+
+	dprintf("%s: base %lx start %lx end %lx\n", __func__, base, start, end);
+
+	address = (ocsd_vaddr_t)base;
+	p_mem_buffer = (uint8_t *)(base + start);
+	mem_length = (end-start);
+
+	if (cs_flags & FLAG_CALLBACK_MEM_ACC)
+		ret = ocsd_dt_add_callback_mem_acc(handle, base+start, base+end-1,
+		    OCSD_MEM_SPACE_ANY, cs_cs_decoder__mem_access, NULL);
+	else
+		ret = ocsd_dt_add_buffer_mem_acc(handle, address, OCSD_MEM_SPACE_ANY,
+		    p_mem_buffer, mem_length);
+
+	if (ret != OCSD_OK)
+		printf("%s: can't create memory accessor: ret %d\n", __func__, ret);
+
+	return (ret);
+}
+
+static ocsd_err_t
+create_generic_decoder(dcd_tree_handle_t handle, const char *p_name, const void *p_cfg,
+    const void *p_context __unused, uint64_t base, uint64_t start, uint64_t end)
+{ 
+	ocsd_err_t ret;
+	uint8_t CSID;
+
+	CSID = 0;
+
+	dprintf("%s\n", __func__);
+
+	ret = ocsd_dt_create_decoder(handle, p_name, OCSD_CREATE_FLG_FULL_DECODER,
+	    p_cfg, &CSID);
+	if(ret != OCSD_OK)
+		return (-1);
+
+	if (cs_flags & FLAG_FORMAT) {
+		ret = ocsd_dt_attach_packet_callback(handle, CSID,
+		    OCSD_C_API_CB_PKT_MON, packet_monitor, p_context);
+		if (ret != OCSD_OK)
+			return (-1);
+	}
+
+	/* attach a memory accessor */
+	ret = create_test_memory_acc(handle, base, start, end);
+	if(ret != OCSD_OK)
+		ocsd_dt_remove_decoder(handle,CSID);
+
+	return (ret);
+}
+
+static ocsd_err_t
+create_decoder_etmv4(dcd_tree_handle_t dcd_tree_h, uint64_t base,
+    uint64_t start, uint64_t end)
+{
+	ocsd_etmv4_cfg trace_config;
+	ocsd_err_t ret;
+
+	trace_config.arch_ver = ARCH_V8;
+	trace_config.core_prof = profile_CortexA;
+
+	trace_config.reg_configr = 0x000000C1;
+	trace_config.reg_traceidr = 0x00000010;   /* Trace ID */
+
+	trace_config.reg_idr0   = 0x28000EA1;
+	trace_config.reg_idr1   = 0x4100F403;
+	trace_config.reg_idr2   = 0x00000488;
+	trace_config.reg_idr8   = 0x0;
+	trace_config.reg_idr9   = 0x0;
+	trace_config.reg_idr10  = 0x0;
+	trace_config.reg_idr11  = 0x0;
+	trace_config.reg_idr12  = 0x0;
+	trace_config.reg_idr13  = 0x0;
+
+	ret = create_generic_decoder(dcd_tree_h, OCSD_BUILTIN_DCD_ETMV4I,
+	    (void *)&trace_config, 0, base, start, end);
+	return (ret);
+}
+
+static ocsd_datapath_resp_t
+gen_trace_elem_print_lookup(const void *p_context, const ocsd_trc_index_t index_sop __unused,
+    const uint8_t trc_chan_id __unused, const ocsd_generic_trace_elem *elem __unused)
+{ 
+	const struct mtrace_data *mdata;
+	ocsd_datapath_resp_t resp;
+	struct pmcstat_symbol *sym;
+	struct pmcstat_image *image;
+
+	mdata = (const struct mtrace_data *)p_context;
+
+	resp = OCSD_RESP_CONT;
+
+#if 0
+	dprintf("%s: Idx:%d ELEM TYPE %d, st_addr %lx, en_addr %lx\n",
+	    __func__, index_sop, elem->elem_type, elem->st_addr, elem->en_addr);
+#endif
+
+	if (elem->st_addr == 0)
+		return (0);
+	sym = symbol_lookup(mdata, elem->st_addr, &image);
+	if (sym)
+		printf("cpu%d:  IP 0x%lx %s %s\n", mdata->cpu, elem->st_addr,
+		    pmcstat_string_unintern(image->pi_name),
+		    pmcstat_string_unintern(sym->ps_name));
+
+	switch (elem->elem_type) {
+	case OCSD_GEN_TRC_ELEM_UNKNOWN:
+		break;
+	case OCSD_GEN_TRC_ELEM_NO_SYNC:
+		/* Trace off */
+		break;
+	case OCSD_GEN_TRC_ELEM_TRACE_ON:
+		break;
+	case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+		printf("range\n");
+		break;
+	case OCSD_GEN_TRC_ELEM_EXCEPTION:
+	case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+	case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+	case OCSD_GEN_TRC_ELEM_EO_TRACE:
+	case OCSD_GEN_TRC_ELEM_ADDR_NACC:
+	case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+	case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
+	case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
+	case OCSD_GEN_TRC_ELEM_EVENT:
+	case OCSD_GEN_TRC_ELEM_SWTRACE:
+	case OCSD_GEN_TRC_ELEM_CUSTOM:
+	default:
+		break;
+	};
+
+	return (resp);
+}
+
+static int
+cs_process_chunk(struct mtrace_data *mdata __unused, uint64_t base,
+    uint64_t start, uint64_t end)
+{
+	uint32_t bytes_done;
+	uint32_t block_size;
+	uint8_t *p_block;
+	int bytes_this_time;
+	int block_index;
+	int dp_ret;
+	int ret;
+
+	dprintf("%s: base %lx start %lx end %lx\n", __func__, base, start, end);
+
+	bytes_this_time = 0;
+	block_index = 0;
+	bytes_done = 0;
+	block_size = (end - start);
+	p_block = (uint8_t *)(base + start);
+
+	ret = OCSD_OK;
+	dp_ret = OCSD_RESP_CONT;
+
+	while (bytes_done < (uint32_t)block_size && (ret == OCSD_OK)) {
+
+		if (OCSD_DATA_RESP_IS_CONT(dp_ret)) {
+			dprintf("process data, block_size %d, bytes_done %d\n", block_size, bytes_done);
+			dp_ret = ocsd_dt_process_data(dcdtree_handle, OCSD_OP_DATA,
+			    block_index + bytes_done,
+			    block_size - bytes_done,
+			    ((uint8_t *)p_block) + bytes_done,
+			    &bytes_this_time);
+			bytes_done += bytes_this_time;
+			dprintf("BYTES DONE %d\n", bytes_done);
+		} else if (OCSD_DATA_RESP_IS_WAIT(dp_ret)) {
+			dp_ret = ocsd_dt_process_data(dcdtree_handle, OCSD_OP_FLUSH,
+			    0, 0, NULL, NULL);
+		} else {
+			ret = OCSD_ERR_DATA_DECODE_FATAL;
+		}
+	}
+
+	ocsd_dt_process_data(dcdtree_handle, OCSD_OP_EOT, 0, 0, NULL, NULL);
+
+	return (0);
+}
+
+static int
+cs_process(struct trace_cpu *tc, struct pmcstat_process *pp,
+    uint32_t cpu, uint32_t cycle, uint64_t offset)
+{
+	struct mtrace_data *mdata;
+
+	mdata = &tc->mdata;
+	mdata->pp = pp;
+
+	cs_init(tc);
+
+	dprintf("%s: cpu %d, cycle %d, tc->base %lx, tc->offset %lx, offset %lx, *tc->base %lx\n",
+	    __func__, cpu, cycle, (uint64_t)tc->base, (uint64_t)tc->offset, offset, *(uint64_t *)tc->base);
+
+	if (offset == tc->offset)
+		return (0);
+
+	if (cycle == tc->cycle) {
+		if (offset > tc->offset) {
+			cs_process_chunk(mdata, (uint64_t)tc->base, tc->offset, offset);
+			tc->offset = offset;
+		} else if (offset < tc->offset) {
+			err(EXIT_FAILURE, "cpu%d: offset already processed %lx %lx",
+			    cpu, offset, tc->offset);
+		}
+	} else if (cycle > tc->cycle) {
+		if ((cycle - tc->cycle) > 1)
+			err(EXIT_FAILURE, "cpu%d: trace buffers fills up faster than"
+			    " we can process it (%d/%d). Consider setting trace filters",
+			    cpu, cycle, tc->cycle);
+		cs_process_chunk(mdata, (uint64_t)tc->base, tc->offset, tc->bufsize);
+		tc->offset = 0;
+		tc->cycle += 1;
+	}
+
+	return (0);
+}
+
+static int
+cs_init(struct trace_cpu *tc)
+{
+	uint64_t start;
+	uint64_t end;
+	int ret;
+
+	ocsd_def_errlog_init(OCSD_ERR_SEV_INFO, 1);
+	ocsd_def_errlog_init(0, 0);
+
+#if 0
+	ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_FILE |
+	    C_API_MSGLOGOUT_FLG_STDOUT, "c_api_test.log");
+	if (ret != OCSD_OK)
+		return (-1);
+#endif
+
+	dcdtree_handle = ocsd_create_dcd_tree(OCSD_TRC_SRC_FRAME_FORMATTED,
+	    OCSD_DFRMTR_FRAME_MEM_ALIGN);
+	if(dcdtree_handle == C_API_INVALID_TREE_HANDLE) {
+		printf("can't find dcd tree\n");
+		return (-1);
+	}
+
+	start = (uint64_t)tc->base;
+	end = (uint64_t)tc->base + tc->bufsize;
+
+	ret = create_decoder_etmv4(dcdtree_handle, (uint64_t)tc->base, start, end);
+	if (ret != OCSD_OK) {
+		printf("can't create decoder: base %lx start %lx end %lx\n",
+		    (uint64_t)tc->base, start, end);
+		return (-2);
+	}
+
+#ifdef PMCTRACE_CS_DEBUG
+	ocsd_tl_log_mapped_mem_ranges(dcdtree_handle);
+#endif
+
+	if (cs_flags & FLAG_FORMAT)
+		ocsd_dt_set_gen_elem_printer(dcdtree_handle);
+	else
+		ocsd_dt_set_gen_elem_outfn(dcdtree_handle, gen_trace_elem_print_lookup,
+		    (const struct mtrace_data *)&tc->mdata);
+
+	attach_raw_printers(dcdtree_handle);
+
+	return (0);
+}
+
+static int
+cs_option(int option)
+{
+
+	switch (option) {
+	case 't':
+		cs_flags |= FLAG_FORMAT;
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+struct trace_dev_methods cs_methods = {
+	.init = cs_init,
+	.process = cs_process,
+	.option = cs_option,
+};
Index: usr.sbin/pmctrace/pmctrace_pt.h
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace_pt.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCTRACE_PT_H_
+#define _PMCTRACE_PT_H_
+
+extern struct trace_dev_methods ipt_methods;
+
+#endif /* !_PMCTRACE_PT_H_ */
Index: usr.sbin/pmctrace/pmctrace_pt.c
===================================================================
--- /dev/null
+++ usr.sbin/pmctrace/pmctrace_pt.c
@@ -0,0 +1,371 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+ 
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/ttycom.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <curses.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <kvm.h>
+#include <libgen.h>
+#include <limits.h>
+#include <math.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <gelf.h>
+#include <inttypes.h>
+
+#include <libpmcstat.h>
+
+#include "pmctrace.h"
+#include "pmctrace_pt.h"
+
+#include <libipt/pt_cpu.h>
+#include <libipt/pt_last_ip.h>
+#include <libipt/pt_time.h>
+#include <libipt/pt_compiler.h>
+#include <libipt/intel-pt.h>
+
+#define	PMCTRACE_PT_DEBUG
+#undef	PMCTRACE_PT_DEBUG
+
+#ifdef	PMCTRACE_PT_DEBUG
+#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
+#else
+#define	dprintf(fmt, ...)
+#endif
+
+static int ipt_flags;
+#define	FLAG_BRANCH_TNT		(1 << 0)	/* Taken/Not Taken */
+
+static struct pmcstat_symbol *
+symbol_lookup(struct mtrace_data *mdata)
+{
+	struct pmcstat_image *image;
+	struct pmcstat_symbol *sym;
+	struct pmcstat_pcmap *map;
+	uint64_t newpc;
+	uint64_t ip;
+
+	if (mdata->ip & (1UL << 47))
+		ip = mdata->ip | 0xffffUL << 48;
+	else
+		ip = mdata->ip;
+
+	map = pmcstat_process_find_map(mdata->pp, ip);
+	if (map != NULL) {
+		image = map->ppm_image;
+		newpc = ip - (map->ppm_lowpc +
+			(image->pi_vaddr - image->pi_start));
+		sym = pmcstat_symbol_search(image, newpc);
+		return (sym);
+	} else
+		dprintf("cpu%d: 0x%lx map not found\n", mdata->cpu, ip);
+
+	return (NULL);
+}
+
+static int
+print_tnt_payload(struct mtrace_data *mdata, uint64_t offset __unused,
+    const struct pt_packet_tnt *packet)
+{
+	char payload[48];
+	uint64_t tnt;
+	uint8_t bits;
+	char *begin;
+	char *end;
+
+	bits = packet->bit_size;
+	tnt = packet->payload;
+	begin = &payload[0];
+	end = begin + bits;
+
+	if (sizeof(payload) < bits)
+		end = begin + sizeof(payload);
+
+	for (; begin < end; ++begin, --bits)
+		*begin = tnt & (1ull << (bits - 1)) ? '!' : '.';
+
+	printf("cpu%d: TNT %s\n", mdata->cpu, payload);
+
+	return (0);
+}
+
+static int
+print_ip_payload(struct mtrace_data *mdata, uint64_t offset __unused,
+    const struct pt_packet_ip *packet)
+{
+	struct pmcstat_symbol *sym;
+
+	switch (packet->ipc) {
+	case pt_ipc_suppressed:
+		break;
+	case pt_ipc_update_16:
+		mdata->ip &= ~0xffffUL;
+		mdata->ip |= (packet->ip & 0xffffUL);
+		break;
+	case pt_ipc_update_32:
+		mdata->ip &= ~0xffffffffUL;
+		mdata->ip |= (packet->ip & 0xffffffffUL);
+		break;
+	case pt_ipc_update_48:
+		mdata->ip &= ~0xffffffffffffUL;
+		mdata->ip |= (packet->ip & 0xffffffffffffUL);
+		break;
+	case pt_ipc_sext_48:
+		mdata->ip &= ~0xffffffffffffUL;
+		mdata->ip |= (packet->ip & 0xffffffffffffUL);
+		symbol_lookup(mdata);
+	case pt_ipc_full:
+		mdata->ip = packet->ip;
+		break;
+	default:
+		printf("unknown ipc: %d\n", packet->ipc);
+		return (0);
+	}
+
+	sym = symbol_lookup(mdata);
+	if (sym) {
+		printf("cpu%d:  IP 0x%lx %s\n", mdata->cpu, mdata->ip,
+		    pmcstat_string_unintern(sym->ps_name));
+	} else
+		dprintf("cpu%d: 0x%lx not found\n", mdata->cpu, mdata->ip);
+
+	return (0);
+}
+
+static int
+dump_packets(struct mtrace_data *mdata, struct pt_packet_decoder *decoder,
+    const struct pt_config *config __unused)
+{
+	struct pt_packet packet;
+	uint64_t offset;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	while (1) {
+		error = pt_pkt_get_offset(decoder, &offset);
+		if (error < 0)
+			errx(EX_SOFTWARE, "ERROR: can't get offset, err %d\n", error);
+
+		error = pt_pkt_next(decoder, &packet, sizeof(packet));
+		if (error < 0) {
+			dprintf("%s: error %d\n", __func__, error);
+			break;
+		}
+
+		switch (packet.type) {
+		case ppt_invalid:
+		case ppt_unknown:
+		case ppt_pad:
+		case ppt_psb:
+		case ppt_psbend:
+			break;
+		case ppt_fup:
+		case ppt_tip:
+		case ppt_tip_pge:
+		case ppt_tip_pgd:
+			print_ip_payload(mdata, offset, &packet.payload.ip);
+			break;
+		case ppt_tnt_8:
+		case ppt_tnt_64:
+			if (ipt_flags & FLAG_BRANCH_TNT)
+				print_tnt_payload(mdata, offset, &packet.payload.tnt);
+			break;
+		case ppt_mode:
+		case ppt_pip:
+		case ppt_vmcs:
+		case ppt_cbr:
+			break;
+		case ppt_tsc:
+			printf("cpu%d: TSC %ld\n", mdata->cpu, packet.payload.tsc.tsc);
+			break;
+		case ppt_tma:
+			break;
+		case ppt_mtc:
+			printf("cpu%d: MTC %x\n", mdata->cpu, packet.payload.mtc.ctc);
+			break;
+		case ppt_cyc:
+		case ppt_stop:
+		case ppt_ovf:
+		case ppt_mnt:
+		case ppt_exstop:
+		case ppt_mwait:
+		case ppt_pwre:
+		case ppt_pwrx:
+		case ppt_ptw:
+		default:
+			break;
+		}
+	}
+
+	return (0);
+}
+
+static int
+ipt_process_chunk(struct mtrace_data *mdata, uint64_t base,
+    uint64_t start, uint64_t end)
+{
+	struct pt_packet_decoder *decoder;
+	struct pt_config config;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	memset(&config, 0, sizeof(config));
+	pt_config_init(&config);
+
+	error = pt_cpu_read(&config.cpu);
+	if (error < 0)
+		errx(EX_SOFTWARE, "ERROR: pt_cpu_read failed, err %d\n", error);
+	error = pt_cpu_errata(&config.errata, &config.cpu);
+	if (error < 0)
+		errx(EX_SOFTWARE, "ERROR: can't get errata, err %d\n", error);
+
+	config.begin = (uint8_t *)(base + start);
+	config.end = (uint8_t *)(base + end);
+
+	dprintf("%s: begin %lx end %lx\n", __func__,
+	    (uint64_t)config.begin, (uint64_t)config.end);
+
+	decoder = pt_pkt_alloc_decoder(&config);
+	if (decoder == NULL) {
+		printf("Can't allocate decoder\n");
+		return (-1);
+	}
+
+	error = pt_pkt_sync_set(decoder, 0ull);
+	if (error < 0)
+		errx(EX_SOFTWARE, "ERROR: sync_set failed, err %d\n", error);
+	error = pt_pkt_sync_forward(decoder);
+	if (error < 0 && error != -pte_eos)
+		errx(EX_SOFTWARE, "ERROR: sync_forward failed, err %d\n", error);
+
+	while (1) {
+		error = dump_packets(mdata, decoder, &config);
+		if (error == 0)
+			break;
+
+		error = pt_pkt_sync_forward(decoder);
+		if (error < 0) {
+			if (error == -pte_eos)
+				return (0);
+		}
+	}
+
+	return (0);
+}
+
+static int
+ipt_process(struct trace_cpu *tc, struct pmcstat_process *pp,
+    uint32_t cpu, uint32_t cycle, uint64_t offset)
+{
+	struct mtrace_data *mdata;
+
+	mdata = &tc->mdata;
+	mdata->pp = pp;
+
+	dprintf("%s: cpu %d, cycle %d, offset %ld\n",
+	    __func__, cpu, cycle, offset);
+
+	if (offset == tc->offset)
+		return (0);
+
+	if (cycle == tc->cycle) {
+		if (offset > tc->offset) {
+			ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, offset);
+			tc->offset = offset;
+		} else if (offset < tc->offset) {
+			err(EXIT_FAILURE, "cpu%d: offset already processed %lx %lx",
+			    cpu, offset, tc->offset);
+		}
+	} else if (cycle > tc->cycle) {
+		if ((cycle - tc->cycle) > 1)
+			err(EXIT_FAILURE, "cpu%d: trace buffers fills up faster than"
+			    " we can process it (%d/%d). Consider setting trace filters",
+			    cpu, cycle, tc->cycle);
+		ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, tc->bufsize);
+		tc->offset = 0;
+		tc->cycle += 1;
+		ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, offset);
+		tc->offset = offset;
+	}
+
+	return (0);
+}
+
+static int
+ipt_option(int option)
+{
+
+	switch (option) {
+	case 't':
+		/* Decode 'Taken/Not_Taken branch' packet. */
+		ipt_flags |= FLAG_BRANCH_TNT;
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+struct trace_dev_methods ipt_methods = {
+	.process = ipt_process,
+	.option = ipt_option,
+};