Index: sys/dev/hwpmc/hwpmc_mod.c
===================================================================
--- sys/dev/hwpmc/hwpmc_mod.c
+++ sys/dev/hwpmc/hwpmc_mod.c
@@ -78,6 +78,7 @@
 #include <vm/vm_object.h>
 
 #include "hwpmc_soft.h"
+#include "hwpmc_vm.h"
 
 #define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et)
 #define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et)
@@ -104,6 +105,7 @@
 #define	PMC_PCPU_SAVED(C,R)	pmc_pcpu_saved[(R) + md->pmd_npmc*(C)]
 
 struct mtx_pool		*pmc_mtxpool;
+struct mtx_pool		*pmc_mtxpool_sleep;
 static int		*pmc_pmcdisp;	 /* PMC row dispositions */
 
 #define	PMC_ROW_DISP_IS_FREE(R)		(pmc_pmcdisp[(R)] == 0)
@@ -1467,6 +1469,8 @@
 			    ("[pmc,%d] pmcval outside of expected range cpu=%d "
 			    "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__,
 			    cpu, ri, newvalue, pm->pm_sc.pm_reloadcount));
+		} else if (PMC_TO_MODE(pm) == PMC_MODE_TT) {
+			/* Nothing */
 		} else {
 			KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
 			    ("[pmc,%d] illegal mode=%d", __LINE__,
@@ -1482,7 +1486,8 @@
 		pcd->pcd_write_pmc(cpu, adjri, newvalue);
 
 		/* If a sampling mode PMC, reset stalled state. */
-		if (PMC_TO_MODE(pm) == PMC_MODE_TS)
+		if (PMC_TO_MODE(pm) == PMC_MODE_TS ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			pm->pm_pcpu_state[cpu].pps_stalled = 0;
 
 		/* Indicate that we desire this to run. */
@@ -1663,6 +1668,8 @@
 						    pm->pm_sc.pm_reloadcount;
 				}
 				mtx_pool_unlock_spin(pmc_mtxpool, pm);
+			} else if (mode == PMC_MODE_TT) {
+				/* Nothing */
 			} else {
 				tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
 
@@ -1755,8 +1762,11 @@
 	char *fullpath, *freepath;
 	const struct pmc *pm;
 	struct pmc_owner *po;
-	const struct pmc_process *pp;
+	struct pmc_process *pp;
+	struct proc *p;
+	bool pause_thread;
 
+	pause_thread = false;
 	freepath = fullpath = NULL;
 	MPASS(!in_epoch(global_epoch_preempt));
 	pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
@@ -1772,19 +1782,43 @@
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		goto done;
 
+	p = td->td_proc;
+	if ((p->p_flag & P_HWPMC) == 0)
+		goto done;
+
 	/*
 	 * Inform sampling PMC owners tracking this process.
 	 */
-	for (ri = 0; ri < md->pmd_npmc; ri++)
-		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
-		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	for (ri = 0; ri < md->pmd_npmc; ri++) {
+		if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+			continue;
+		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			pmclog_process_map_in(pm->pm_owner,
 			    pid, pkm->pm_address, fullpath);
+		if (PMC_TO_MODE(pm) == PMC_MODE_TT)
+			pause_thread = true;
+	}
 
   done:
 	if (freepath)
 		free(freepath, M_TEMP);
 	PMC_EPOCH_EXIT();
+
+	/*
+	 * pmclog entry with mmap information just scheduled to ship
+	 * to userspace. This not yet received by pmctrace application.
+	 * Put this thread on pause before we continue. Once user process
+	 * receive log entry, it can reconfigure tracing filters, start
+	 * tracing operation and finally wakeup this thread.
+	 */
+	if (pp != NULL && pause_thread == true) {
+		pp->pp_refcnt++;
+		mtx_lock(pp->pp_tslock);
+		msleep(pp, pp->pp_tslock, PWAIT, "pmc-mmap", 0);
+		mtx_unlock(pp->pp_tslock);
+		pp->pp_refcnt--;
+	}
 }
 
 
@@ -1813,11 +1847,14 @@
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		return;
 
-	for (ri = 0; ri < md->pmd_npmc; ri++)
-		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
-		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	for (ri = 0; ri < md->pmd_npmc; ri++) {
+		if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+			continue;
+		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+		    PMC_TO_MODE(pm) == PMC_MODE_TT)
 			pmclog_process_map_out(pm->pm_owner, pid,
 			    pkm->pm_address, pkm->pm_address + pkm->pm_size);
+	}
 }
 
 /*
@@ -1831,7 +1868,8 @@
 	struct pmckern_map_in *km, *kmbase;
 
 	MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
-	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+	    PMC_TO_MODE(pm) == PMC_MODE_ST,
 	    ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
 		__LINE__, (void *) pm));
 
@@ -2594,8 +2632,8 @@
 
 	mtx_lock_spin(&pmc_processhash_mtx);
 	LIST_FOREACH(pp, pph, pp_next)
-	    if (pp->pp_proc == p)
-		    break;
+		if (pp->pp_proc == p)
+			break;
 
 	if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
 		LIST_REMOVE(pp, pp_next);
@@ -2605,6 +2643,7 @@
 		ppnew->pp_proc = p;
 		LIST_INIT(&ppnew->pp_tds);
 		ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew);
+		ppnew->pp_tslock = mtx_pool_find(pmc_mtxpool_sleep, ppnew);
 		LIST_INSERT_HEAD(pph, ppnew, pp_next);
 		mtx_unlock_spin(&pmc_processhash_mtx);
 		pp = ppnew;
@@ -2873,6 +2912,15 @@
 		 */
 		LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
 			pp = ptgt->pt_process;
+
+			/*
+			 * Owner of this pmc descriptor is terminated.
+			 * If any tracing target thread that associated with
+			 * this pmc descriptor is sleeping, then release it
+			 * from sleeping.
+			 */
+			wakeup(pp);
+
 			pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
 
 			PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt);
@@ -3160,7 +3208,8 @@
 	 * If this is a sampling mode PMC, log mapping information for
 	 * the kernel modules that are currently loaded.
 	 */
-	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+	    PMC_TO_MODE(pm) == PMC_MODE_ST)
 	    pmc_log_kernel_mappings(pm);
 
 	if (PMC_IS_VIRTUAL_MODE(mode)) {
@@ -3835,9 +3884,14 @@
 		mode = pa.pm_mode;
 		cpu  = pa.pm_cpu;
 
-		if ((mode != PMC_MODE_SS  &&  mode != PMC_MODE_SC  &&
-		     mode != PMC_MODE_TS  &&  mode != PMC_MODE_TC) ||
-		    (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
+		if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
+		    mode != PMC_MODE_SC && mode != PMC_MODE_TC &&
+		    mode != PMC_MODE_ST && mode != PMC_MODE_TT) {
+			error = EINVAL;
+			break;
+		}
+
+		if (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
@@ -4312,6 +4366,176 @@
 	}
 	break;
 
+	case PMC_OP_LOG_KERNEL_MAP:
+	{
+		struct pmc_op_simple sp;
+		struct pmc *pm;
+
+		if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(sp.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST)
+			break;
+
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		pmc_log_kernel_mappings(pm);
+	}
+	break;
+
+	case PMC_OP_THREAD_WAKEUP:
+	{
+		struct pmc_op_thread_wakeup u;
+		struct pmc_process *pp;
+		struct proc *p;
+		struct pmc *pm;
+
+		if ((error = copyin(arg, &u, sizeof(u))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(u.pm_pmcid, &pm)) != 0)
+			break;
+
+		/* lookup pid */
+		if ((p = pfind(u.pm_pid)) == NULL) {
+			error = ESRCH;
+			break;
+		}
+
+		if ((p->p_flag & P_HWPMC) == 0)
+			break;
+
+		if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
+			wakeup(pp);
+
+		PROC_UNLOCK(p);
+	}
+	break;
+
+	case PMC_OP_TRACE_CONFIG:
+	{
+		struct pmc_op_trace_config trc;
+		uint64_t *ranges;
+		struct pmc *pm;
+		struct pmc_binding pb;
+		struct pmc_classdep *pcd;
+		uint32_t nranges;
+		uint32_t cpu;
+		uint32_t ri;
+		int adjri;
+
+		if ((error = copyin(arg, &trc, sizeof(trc))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(trc.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+		    PMC_TO_MODE(pm) != PMC_MODE_TT)
+			break;
+
+		/* Can't proceed with PMC that hasn't been started. */
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		cpu = trc.pm_cpu;
+
+		ri = PMC_TO_ROWINDEX(pm);
+		pcd = pmc_ri_to_classdep(md, ri, &adjri);
+		if (pcd->pcd_trace_config == NULL)
+			break;
+
+		/* switch to CPU 'cpu' */
+		pmc_save_cpu_binding(&pb);
+		pmc_select_cpu(cpu);
+
+		ranges = trc.ranges;
+		nranges = trc.nranges;
+
+		mtx_pool_lock_spin(pmc_mtxpool, pm);
+		error = (*pcd->pcd_trace_config)(cpu, adjri,
+		    pm, ranges, nranges);
+		mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+		pmc_restore_cpu_binding(&pb);
+	}
+	break;
+
+	/*
+	 * Read a PMC trace buffer ptr.
+	 */
+	case PMC_OP_TRACE_READ:
+	{
+		struct pmc_op_trace_read trr;
+		struct pmc_op_trace_read *trr_ret;
+		struct pmc_binding pb;
+		struct pmc_classdep *pcd;
+		struct pmc *pm;
+		pmc_value_t cycle;
+		pmc_value_t offset;
+		uint32_t cpu;
+		uint32_t ri;
+		int adjri;
+
+		if ((error = copyin(arg, &trr, sizeof(trr))) != 0)
+			break;
+
+		/* locate pmc descriptor */
+		if ((error = pmc_find_pmc(trr.pm_pmcid, &pm)) != 0)
+			break;
+
+		if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+		    PMC_TO_MODE(pm) != PMC_MODE_TT)
+			break;
+
+		/* Can't read a PMC that hasn't been started. */
+		if (pm->pm_state != PMC_STATE_ALLOCATED &&
+		    pm->pm_state != PMC_STATE_STOPPED &&
+		    pm->pm_state != PMC_STATE_RUNNING) {
+			error = EINVAL;
+			break;
+		}
+
+		cpu = trr.pm_cpu;
+
+		ri = PMC_TO_ROWINDEX(pm);
+		pcd = pmc_ri_to_classdep(md, ri, &adjri);
+
+		/* switch to CPU 'cpu' */
+		pmc_save_cpu_binding(&pb);
+		pmc_select_cpu(cpu);
+
+		mtx_pool_lock_spin(pmc_mtxpool, pm);
+		error = (*pcd->pcd_read_trace)(cpu, adjri,
+		    pm, &cycle, &offset);
+		mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+		pmc_restore_cpu_binding(&pb);
+
+		trr_ret = (struct pmc_op_trace_read *)arg;
+		if ((error = copyout(&cycle, &trr_ret->pm_cycle,
+		    sizeof(trr.pm_cycle))))
+			break;
+		if ((error = copyout(&offset, &trr_ret->pm_offset,
+		    sizeof(trr.pm_offset))))
+			break;
+	}
+	break;
 
 	/*
 	 * Read and/or write a PMC.
@@ -4415,7 +4639,7 @@
 			/* save old value */
 			if (prw.pm_flags & PMC_F_OLDVALUE)
 				if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
-					 &oldvalue)))
+				    &oldvalue)))
 					goto error;
 			/* write out new value */
 			if (prw.pm_flags & PMC_F_NEWVALUE)
@@ -5585,6 +5809,12 @@
 			return (ENOSYS);
         }
 
+	error = pmc_vm_initialize(md);
+	if (error) {
+		pmc_md_finalize(md);
+		return (error);
+	}
+
 	KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1,
 	    ("[pmc,%d] no classes or pmcs", __LINE__));
 
@@ -5705,6 +5935,8 @@
 	/* allocate a pool of spin mutexes */
 	pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size,
 	    MTX_SPIN);
+	pmc_mtxpool_sleep = mtx_pool_create("pmc-sleep", pmc_mtxpool_size,
+	    MTX_DEF);
 
 	PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx "
 	    "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
@@ -5822,6 +6054,8 @@
 
 	if (pmc_mtxpool)
 		mtx_pool_destroy(&pmc_mtxpool);
+	if (pmc_mtxpool_sleep)
+		mtx_pool_destroy(&pmc_mtxpool_sleep);
 
 	mtx_destroy(&pmc_processhash_mtx);
 	taskqgroup_config_gtask_deinit(&free_gtask);
@@ -5919,6 +6153,8 @@
 	}
 
 	pmclog_shutdown();
+	pmc_vm_finalize();
+
 	counter_u64_free(pmc_stats.pm_intr_ignored);
 	counter_u64_free(pmc_stats.pm_intr_processed);
 	counter_u64_free(pmc_stats.pm_intr_bufferfull);
Index: sys/sys/pmc.h
===================================================================
--- sys/sys/pmc.h
+++ sys/sys/pmc.h
@@ -110,6 +110,7 @@
 	__PMC_CPU(INTEL_BROADWELL_XEON, 0x97,   "Intel Broadwell Xeon") \
 	__PMC_CPU(INTEL_SKYLAKE, 0x98,   "Intel Skylake")		\
 	__PMC_CPU(INTEL_SKYLAKE_XEON, 0x99,   "Intel Skylake Xeon")	\
+	__PMC_CPU(INTEL_KABYLAKE, 0x9A,   "Intel Kabylake")	\
 	__PMC_CPU(INTEL_XSCALE,	0x100,	"Intel XScale")		\
 	__PMC_CPU(MIPS_24K,     0x200,  "MIPS 24K")		\
 	__PMC_CPU(MIPS_OCTEON,  0x201,  "Cavium Octeon")	\
@@ -162,7 +163,9 @@
 	__PMC_CLASS(ARMV8,	0x11,	"ARMv8")			\
 	__PMC_CLASS(MIPS74K,	0x12,	"MIPS 74K")			\
 	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")		\
-	__PMC_CLASS(BERI,	0x14,	"MIPS BERI")
+	__PMC_CLASS(BERI,	0x14,	"MIPS BERI")			\
+	__PMC_CLASS(PT,		0x15,	"Intel PT")			\
+	__PMC_CLASS(CORESIGHT,	0x16,	"ARM Coresight")
 
 enum pmc_class {
 #undef  __PMC_CLASS
@@ -171,7 +174,7 @@
 };
 
 #define	PMC_CLASS_FIRST	PMC_CLASS_TSC
-#define	PMC_CLASS_LAST	PMC_CLASS_E500
+#define	PMC_CLASS_LAST	PMC_CLASS_CORESIGHT
 
 /*
  * A PMC can be in the following states:
@@ -242,7 +245,9 @@
 	__PMC_MODE(SS,	0)			\
 	__PMC_MODE(SC,	1)			\
 	__PMC_MODE(TS,	2)			\
-	__PMC_MODE(TC,	3)
+	__PMC_MODE(TC,	3)			\
+	__PMC_MODE(ST,	4)			\
+	__PMC_MODE(TT,	5)
 
 enum pmc_mode {
 #undef	__PMC_MODE
@@ -256,11 +261,11 @@
 #define	PMC_IS_COUNTING_MODE(mode)				\
 	((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
 #define	PMC_IS_SYSTEM_MODE(mode)				\
-	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
+	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC || (mode) == PMC_MODE_ST)
 #define	PMC_IS_SAMPLING_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
 #define	PMC_IS_VIRTUAL_MODE(mode)				\
-	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
+	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC || (mode) == PMC_MODE_TT)
 
 /*
  * PMC row disposition
@@ -352,7 +357,11 @@
 	__PMC_OP(PMCSTOP, "Stop a PMC")					\
 	__PMC_OP(WRITELOG, "Write a cookie to the log file")		\
 	__PMC_OP(CLOSELOG, "Close log file")				\
-	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")
+	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")		\
+	__PMC_OP(LOG_KERNEL_MAP, "Log kernel mappings")			\
+	__PMC_OP(THREAD_WAKEUP, "Thread wakeup")			\
+	__PMC_OP(TRACE_READ, "Read trace buffer pointer")		\
+	__PMC_OP(TRACE_CONFIG, "Setup trace IP ranges")
 
 
 enum pmc_ops {
@@ -500,7 +509,6 @@
 	pmc_value_t	pm_value;	/* new&returned value */
 };
 
-
 /*
  * OP GETPMCINFO
  *
@@ -526,6 +534,40 @@
 	struct pmc_info	pm_pmcs[];	/* space for 'npmc' structures */
 };
 
+/*
+ * OP THREAD_WAKEUP
+ *
+ * Wakeup a sleeping thread.
+ */
+
+struct pmc_op_thread_wakeup {
+	pmc_id_t	pm_pmcid;
+	pid_t		pm_pid;
+};
+
+/*
+ * OP TRACE_CONFIG
+ */
+
+#define	PMC_FILTER_MAX_IP_RANGES	4
+
+struct pmc_op_trace_config {
+	pmc_id_t	pm_pmcid;
+	uint32_t	pm_cpu;		/* CPU number or PMC_CPU_ANY */
+	uint64_t	ranges[2 * PMC_FILTER_MAX_IP_RANGES];
+	uint32_t	nranges;
+};
+
+/*
+ * OP TRACE_READ
+ */
+
+struct pmc_op_trace_read {
+	pmc_id_t	pm_pmcid;
+	uint32_t	pm_cpu;
+	pmc_value_t	pm_cycle;	/* returned value */
+	pmc_value_t	pm_offset;	/* returned value */
+};
 
 /*
  * OP GETCPUINFO
@@ -533,7 +575,6 @@
  * Retrieve system CPU information.
  */
 
-
 struct pmc_classinfo {
 	enum pmc_class	pm_class;	/* class id */
 	uint32_t	pm_caps;	/* counter capabilities */
@@ -843,6 +884,7 @@
 	LIST_ENTRY(pmc_process) pp_next;	/* hash chain */
 	LIST_HEAD(,pmc_thread) pp_tds;		/* list of threads */
 	struct mtx	*pp_tdslock;		/* lock on pp_tds thread list */
+	struct mtx	*pp_tslock;		/* thread sleep lock */
 	int		pp_refcnt;		/* reference count */
 	uint32_t	pp_flags;		/* flags PMC_PP_* */
 	struct proc	*pp_proc;		/* target process */
@@ -1019,6 +1061,12 @@
 	int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
 	int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
 
+	/* tracing */
+	int (*pcd_read_trace)(int _cpu, int _ri, struct pmc *_pm,
+	    pmc_value_t *_cycle, pmc_value_t *_offset);
+	int (*pcd_trace_config)(int _cpu, int _ri, struct pmc *_pm,
+	    uint64_t *ranges, uint32_t nranges);
+
 	/* pmc allocation/release */
 	int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
 		const struct pmc_op_pmcallocate *_a);
@@ -1046,7 +1094,7 @@
  * Machine dependent bits needed per CPU type.
  */
 
-struct pmc_mdep  {
+struct pmc_mdep {
 	uint32_t	pmd_cputype;    /* from enum pmc_cputype */
 	uint32_t	pmd_npmc;	/* number of PMCs per CPU */
 	uint32_t	pmd_nclass;	/* number of PMC classes present */