Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F135243205
D12875.id38483.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
93 KB
Referenced Files
None
Subscribers
None
D12875.id38483.diff
View Options
Index: lib/libpmc/libpmc.c
===================================================================
--- lib/libpmc/libpmc.c
+++ lib/libpmc/libpmc.c
@@ -76,6 +76,10 @@
static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
#endif
+#if defined(__amd64__)
+static int pt_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+ struct pmc_op_pmcallocate *_pmc_config);
+#endif
#if defined(__arm__)
#if defined(__XSCALE__)
static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
@@ -239,6 +243,12 @@
__PMC_EV_ALIAS_SKYLAKE_XEON()
};
+static const struct pmc_event_descr kabylake_event_table[] =
+{
+ /* Kabylake events are similar to Skylake */
+ __PMC_EV_ALIAS_SKYLAKE()
+};
+
static const struct pmc_event_descr ivybridge_event_table[] =
{
__PMC_EV_ALIAS_IVYBRIDGE()
@@ -336,6 +346,7 @@
PMC_MDEP_TABLE(broadwell_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(skylake, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(skylake_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(kabylake, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP, PMC_CLASS_PT);
PMC_MDEP_TABLE(ivybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(ivybridge_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
@@ -365,6 +376,11 @@
__PMC_EV_TSC()
};
+static const struct pmc_event_descr pt_event_table[] =
+{
+ __PMC_EV_PT()
+};
+
#undef PMC_CLASS_TABLE_DESC
#define PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR) \
static const struct pmc_class_descr NAME##_class_table_descr = \
@@ -392,6 +408,7 @@
PMC_CLASS_TABLE_DESC(broadwell_xeon, IAP, broadwell_xeon, iap);
PMC_CLASS_TABLE_DESC(skylake, IAP, skylake, iap);
PMC_CLASS_TABLE_DESC(skylake_xeon, IAP, skylake_xeon, iap);
+PMC_CLASS_TABLE_DESC(kabylake, IAP, kabylake, iap);
PMC_CLASS_TABLE_DESC(ivybridge, IAP, ivybridge, iap);
PMC_CLASS_TABLE_DESC(ivybridge_xeon, IAP, ivybridge_xeon, iap);
PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap);
@@ -419,6 +436,9 @@
#if defined(__i386__) || defined(__amd64__)
PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
#endif
+#if defined(__amd64__)
+PMC_CLASS_TABLE_DESC(pt, PT, pt, pt);
+#endif
#if defined(__arm__)
#if defined(__XSCALE__)
PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale);
@@ -732,6 +752,8 @@
#define skylake_aliases_without_iaf core2_aliases_without_iaf
#define skylake_xeon_aliases core2_aliases
#define skylake_xeon_aliases_without_iaf core2_aliases_without_iaf
+#define kabylake_aliases core2_aliases
+#define kabylake_aliases_without_iaf core2_aliases_without_iaf
#define ivybridge_aliases core2_aliases
#define ivybridge_aliases_without_iaf core2_aliases_without_iaf
#define ivybridge_xeon_aliases core2_aliases
@@ -1049,7 +1071,8 @@
return (-1);
} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE ||
- cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE_XEON) {
+ cpu_info.pm_cputype == PMC_CPU_INTEL_SKYLAKE_XEON ||
+ cpu_info.pm_cputype == PMC_CPU_INTEL_KABYLAKE) {
if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
n = pmc_parse_mask(iap_rsp_mask_skylake, p, &rsp);
} else
@@ -2495,6 +2518,84 @@
}
#endif
+#if defined(__amd64__)
+
+#define INTEL_PT_KW_BRANCHES "branches"
+#define INTEL_PT_KW_TSC "tsc"
+#define INTEL_PT_KW_MTC "mtc"
+#define INTEL_PT_KW_DISRETC "disretc"
+#define INTEL_PT_KW_ADDRA "addra"
+#define INTEL_PT_KW_ADDRB "addrb"
+
+static int
+pt_allocate_pmc(enum pmc_event pe, char *ctrspec,
+ struct pmc_op_pmcallocate *pmc_config)
+{
+ struct pmc_md_pt_op_pmcallocate *pm_pt;
+ uint64_t addr;
+ uint32_t addrn;
+ char *p, *q, *e;
+
+ if (pe != PMC_EV_PT_PT)
+ return (-1);
+
+ pm_pt = (struct pmc_md_pt_op_pmcallocate *)&pmc_config->pm_md.pm_pt;
+
+ addrn = 0;
+ while ((p = strsep(&ctrspec, ",")) != NULL) {
+ if (KWMATCH(p, INTEL_PT_KW_BRANCHES)) {
+ pm_pt->flags |= INTEL_PT_FLAG_BRANCHES;
+ }
+
+ if (KWMATCH(p, INTEL_PT_KW_TSC)) {
+ pm_pt->flags |= INTEL_PT_FLAG_TSC;
+ }
+
+ if (KWMATCH(p, INTEL_PT_KW_MTC)) {
+ pm_pt->flags |= INTEL_PT_FLAG_MTC;
+ }
+
+ if (KWMATCH(p, INTEL_PT_KW_DISRETC)) {
+ pm_pt->flags |= INTEL_PT_FLAG_DISRETC;
+ }
+
+ if (KWPREFIXMATCH(p, INTEL_PT_KW_ADDRA "=")) {
+ q = strchr(p, '=');
+ if (*++q == '\0') /* skip '=' */
+ return (-1);
+
+ addr = strtoul(q, &e, 0);
+ if (e == q || *e != '\0')
+ return (-1);
+ pm_pt->ranges[addrn * 2] = addr;
+ }
+
+ if (KWPREFIXMATCH(p, INTEL_PT_KW_ADDRB "=")) {
+ q = strchr(p, '=');
+ if (*++q == '\0') /* skip '=' */
+ return (-1);
+
+ addr = strtoul(q, &e, 0);
+ if (e == q || *e != '\0')
+ return (-1);
+ pm_pt->ranges[addrn * 2 + 1] = addr;
+
+ if (pm_pt->ranges[addrn * 2 + 1] < pm_pt->ranges[addrn * 2])
+ return (-1);
+ addrn += 1;
+ if (addrn > PT_NADDR)
+ return (-1);
+ }
+ };
+
+ pm_pt->nranges = addrn;
+
+ pmc_config->pm_caps |= PMC_CAP_READ;
+
+ return (0);
+}
+#endif
+
static struct pmc_event_alias generic_aliases[] = {
EV_ALIAS("instructions", "SOFT-CLOCK.HARD"),
EV_ALIAS(NULL, NULL)
@@ -2780,7 +2881,8 @@
retval = -1;
if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
- mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
+ mode != PMC_MODE_SC && mode != PMC_MODE_TC &&
+ mode != PMC_MODE_ST && mode != PMC_MODE_TT) {
errno = EINVAL;
goto out;
}
@@ -2903,6 +3005,7 @@
int
pmc_cpuinfo(const struct pmc_cpuinfo **pci)
{
+
if (pmc_syscall == -1) {
errno = ENXIO;
return (-1);
@@ -3023,6 +3126,10 @@
ev = skylake_xeon_event_table;
count = PMC_EVENT_TABLE_SIZE(skylake_xeon);
break;
+ case PMC_CPU_INTEL_KABYLAKE:
+ ev = kabylake_event_table;
+ count = PMC_EVENT_TABLE_SIZE(kabylake);
+ break;
case PMC_CPU_INTEL_IVYBRIDGE:
ev = ivybridge_event_table;
count = PMC_EVENT_TABLE_SIZE(ivybridge);
@@ -3086,6 +3193,10 @@
ev = tsc_event_table;
count = PMC_EVENT_TABLE_SIZE(tsc);
break;
+ case PMC_CLASS_PT:
+ ev = pt_event_table;
+ count = PMC_EVENT_TABLE_SIZE(pt);
+ break;
case PMC_CLASS_K7:
ev = k7_event_table;
count = PMC_EVENT_TABLE_SIZE(k7);
@@ -3184,12 +3295,14 @@
int
pmc_flush_logfile(void)
{
+
return (PMC_CALL(FLUSHLOG,0));
}
int
pmc_close_logfile(void)
{
+
return (PMC_CALL(CLOSELOG,0));
}
@@ -3406,6 +3519,12 @@
case PMC_CPU_INTEL_SKYLAKE_XEON:
PMC_MDEP_INIT_INTEL_V2(skylake_xeon);
break;
+ case PMC_CPU_INTEL_KABYLAKE:
+#if defined(__amd64__)
+ pmc_class_table[n++] = &pt_class_table_descr;
+#endif
+ PMC_MDEP_INIT_INTEL_V2(kabylake);
+ break;
case PMC_CPU_INTEL_IVYBRIDGE:
PMC_MDEP_INIT_INTEL_V2(ivybridge);
break;
@@ -3623,6 +3742,11 @@
evfence = skylake_xeon_event_table +
PMC_EVENT_TABLE_SIZE(skylake_xeon);
break;
+ case PMC_CPU_INTEL_KABYLAKE:
+ ev = kabylake_event_table;
+ evfence = kabylake_event_table +
+ PMC_EVENT_TABLE_SIZE(kabylake);
+ break;
case PMC_CPU_INTEL_IVYBRIDGE:
ev = ivybridge_event_table;
evfence = ivybridge_event_table + PMC_EVENT_TABLE_SIZE(ivybridge);
@@ -3736,6 +3860,9 @@
} else if (pe == PMC_EV_TSC_TSC) {
ev = tsc_event_table;
evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc);
+ } else if (pe == PMC_EV_PT_PT) {
+ ev = pt_event_table;
+ evfence = pt_event_table + PMC_EVENT_TABLE_SIZE(pt);
} else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) {
ev = soft_event_table;
evfence = soft_event_table + soft_event_info.pm_nevent;
@@ -3853,6 +3980,68 @@
}
int
+pmc_proc_unsuspend(pmc_id_t pmc, pid_t pid)
+{
+ struct pmc_op_proc_unsuspend u;
+
+ u.pm_pmcid = pmc;
+ u.pm_pid = pid;
+
+ return (PMC_CALL(THREAD_UNSUSPEND, &u));
+}
+
+int
+pmc_read_trace(uint32_t cpu, pmc_id_t pmc,
+ pmc_value_t *cycle, pmc_value_t *offset)
+{
+ struct pmc_op_trace_read pmc_trace_read;
+
+ pmc_trace_read.pm_pmcid = pmc;
+ pmc_trace_read.pm_cpu = cpu;
+ pmc_trace_read.pm_cycle = 0;
+ pmc_trace_read.pm_offset = 0;
+
+ if (PMC_CALL(TRACE_READ, &pmc_trace_read) < 0)
+ return (-1);
+
+ *cycle = pmc_trace_read.pm_cycle;
+ *offset = pmc_trace_read.pm_offset;
+
+ return (0);
+}
+
+int
+pmc_trace_config(uint32_t cpu, pmc_id_t pmc,
+ uint64_t *ranges, uint32_t nranges)
+{
+ struct pmc_op_trace_config trc;
+
+ trc.pm_pmcid = pmc;
+ trc.pm_cpu = cpu;
+ trc.nranges = nranges;
+
+ if (nranges > PMC_FILTER_MAX_IP_RANGES)
+ return (-1);
+
+ memcpy(&trc.ranges, ranges, sizeof(uint64_t) * 2 * nranges);
+
+ if (PMC_CALL(TRACE_CONFIG, &trc) < 0)
+ return (-1);
+
+ return (0);
+}
+
+int
+pmc_log_kmap(pmc_id_t pmc)
+{
+ struct pmc_op_simple pmc_log_km;
+
+ pmc_log_km.pm_pmcid = pmc;
+
+ return (PMC_CALL(LOG_KERNEL_MAP, &pmc_log_km));
+}
+
+int
pmc_release(pmc_id_t pmc)
{
struct pmc_op_simple pmc_release_args;
Index: lib/libpmc/pmc.h
===================================================================
--- lib/libpmc/pmc.h
+++ lib/libpmc/pmc.h
@@ -77,6 +77,7 @@
int pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags,
int _cpu, pmc_id_t *_pmcid);
int pmc_attach(pmc_id_t _pmcid, pid_t _pid);
+int pmc_proc_unsuspend(pmc_id_t pmc, pid_t pid);
int pmc_capabilities(pmc_id_t _pmc, uint32_t *_caps);
int pmc_configure_logfile(int _fd);
int pmc_flush_logfile(void);
@@ -88,7 +89,10 @@
int pmc_get_msr(pmc_id_t _pmc, uint32_t *_msr);
int pmc_init(void);
int pmc_read(pmc_id_t _pmc, pmc_value_t *_value);
+int pmc_read_trace(uint32_t cpu, pmc_id_t pmc, pmc_value_t *cycle, pmc_value_t *offset);
+int pmc_trace_config(uint32_t cpu, pmc_id_t pmc, uint64_t *ranges, uint32_t nranges);
int pmc_release(pmc_id_t _pmc);
+int pmc_log_kmap(pmc_id_t pmc);
int pmc_rw(pmc_id_t _pmc, pmc_value_t _newvalue, pmc_value_t *_oldvalue);
int pmc_set(pmc_id_t _pmc, pmc_value_t _value);
int pmc_start(pmc_id_t _pmc);
Index: sys/amd64/include/pmc_mdep.h
===================================================================
--- sys/amd64/include/pmc_mdep.h
+++ sys/amd64/include/pmc_mdep.h
@@ -45,6 +45,7 @@
#include <dev/hwpmc/hwpmc_core.h>
#include <dev/hwpmc/hwpmc_piv.h>
#include <dev/hwpmc/hwpmc_tsc.h>
+#include <dev/hwpmc/hwpmc_pt.h>
#include <dev/hwpmc/hwpmc_uncore.h>
/*
@@ -57,6 +58,7 @@
#define PMC_MDEP_CLASS_INDEX_P4 2
#define PMC_MDEP_CLASS_INDEX_IAP 2
#define PMC_MDEP_CLASS_INDEX_IAF 3
+#define PMC_MDEP_CLASS_INDEX_PT 4
#define PMC_MDEP_CLASS_INDEX_UCP 4
#define PMC_MDEP_CLASS_INDEX_UCF 5
@@ -70,6 +72,7 @@
* IAF Intel fixed-function PMCs in Core2 and later CPUs.
* UCP Intel Uncore programmable PMCs.
* UCF Intel Uncore fixed-function PMCs.
+ * PT Intel PT event.
*/
union pmc_md_op_pmcallocate {
@@ -79,7 +82,8 @@
struct pmc_md_ucf_op_pmcallocate pm_ucf;
struct pmc_md_ucp_op_pmcallocate pm_ucp;
struct pmc_md_p4_op_pmcallocate pm_p4;
- uint64_t __pad[4];
+ struct pmc_md_pt_op_pmcallocate pm_pt;
+ uint64_t __pad[1];
};
/* Logging */
@@ -95,6 +99,7 @@
struct pmc_md_ucf_pmc pm_ucf;
struct pmc_md_ucp_pmc pm_ucp;
struct pmc_md_p4_pmc pm_p4;
+ struct pmc_md_pt_pmc pm_pt;
};
#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_rip)
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -1802,6 +1802,7 @@
dev/hptiop/hptiop.c optional hptiop scbus
dev/hwpmc/hwpmc_logging.c optional hwpmc
dev/hwpmc/hwpmc_mod.c optional hwpmc
+dev/hwpmc/hwpmc_vm.c optional hwpmc
dev/hwpmc/hwpmc_soft.c optional hwpmc
dev/ichiic/ig4_acpi.c optional ig4 acpi iicbus
dev/ichiic/ig4_iic.c optional ig4 iicbus
Index: sys/conf/files.amd64
===================================================================
--- sys/conf/files.amd64
+++ sys/conf/files.amd64
@@ -312,6 +312,7 @@
dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
+dev/hwpmc/hwpmc_pt.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/hyperv/input/hv_kbd.c optional hyperv
Index: sys/dev/hwpmc/hwpmc_core.c
===================================================================
--- sys/dev/hwpmc/hwpmc_core.c
+++ sys/dev/hwpmc/hwpmc_core.c
@@ -2287,6 +2287,7 @@
break;
case PMC_CPU_INTEL_SKYLAKE:
case PMC_CPU_INTEL_SKYLAKE_XEON:
+ case PMC_CPU_INTEL_KABYLAKE:
case PMC_CPU_INTEL_BROADWELL:
case PMC_CPU_INTEL_BROADWELL_XEON:
case PMC_CPU_INTEL_SANDYBRIDGE:
@@ -2325,6 +2326,7 @@
cpuflag = IAP_F_SLX;
break;
case PMC_CPU_INTEL_SKYLAKE:
+ case PMC_CPU_INTEL_KABYLAKE:
cpuflag = IAP_F_SL;
break;
case PMC_CPU_INTEL_BROADWELL_XEON:
@@ -2846,6 +2848,12 @@
struct core_cpu *cc;
pmc_value_t v;
+ error = pmc_pt_intr(cpu, tf);
+ if (error) {
+ /* Found */
+ return (1);
+ }
+
PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
TRAPF_USERMODE(tf));
Index: sys/dev/hwpmc/hwpmc_intel.c
===================================================================
--- sys/dev/hwpmc/hwpmc_intel.c
+++ sys/dev/hwpmc/hwpmc_intel.c
@@ -181,12 +181,8 @@
cputype = PMC_CPU_INTEL_IVYBRIDGE_XEON;
nclasses = 3;
break;
- /* Skylake */
case 0x4e:
case 0x5e:
- /* Kabylake */
- case 0x8E: /* Per Intel document 325462-063US July 2017. */
- case 0x9E: /* Per Intel document 325462-063US July 2017. */
cputype = PMC_CPU_INTEL_SKYLAKE;
nclasses = 3;
break;
@@ -220,6 +216,11 @@
nclasses = 3;
break;
}
+ case 0x8E: /* Per Intel document 325462-063US July 2017. */
+ case 0x9E: /* Per Intel document 325462-063US July 2017. */
+ cputype = PMC_CPU_INTEL_KABYLAKE;
+ nclasses = 4;
+ break;
break;
#if defined(__i386__) || defined(__amd64__)
case 0xF00: /* P4 */
@@ -237,7 +238,7 @@
/* Allocate base class and initialize machine dependent struct */
pmc_mdep = pmc_mdep_alloc(nclasses);
- pmc_mdep->pmd_cputype = cputype;
+ pmc_mdep->pmd_cputype = cputype;
pmc_mdep->pmd_switch_in = intel_switch_in;
pmc_mdep->pmd_switch_out = intel_switch_out;
@@ -256,6 +257,7 @@
case PMC_CPU_INTEL_BROADWELL_XEON:
case PMC_CPU_INTEL_SKYLAKE_XEON:
case PMC_CPU_INTEL_SKYLAKE:
+ case PMC_CPU_INTEL_KABYLAKE:
case PMC_CPU_INTEL_CORE:
case PMC_CPU_INTEL_CORE2:
case PMC_CPU_INTEL_CORE2EXTREME:
@@ -312,10 +314,10 @@
goto error;
}
+#if defined(__i386__) || defined(__amd64__)
/*
* Init the uncore class.
*/
-#if defined(__i386__) || defined(__amd64__)
switch (cputype) {
/*
* Intel Corei7 and Westmere processors.
@@ -330,7 +332,19 @@
default:
break;
}
+
+ /*
+ * Intel Processor Tracing (PT).
+ */
+ if (cputype == PMC_CPU_INTEL_KABYLAKE) {
+ error = pmc_pt_initialize(pmc_mdep, ncpus);
+ if (error) {
+ pmc_pt_finalize(pmc_mdep);
+ goto error;
+ }
+ }
#endif
+
error:
if (error) {
pmc_mdep_free(pmc_mdep);
@@ -353,6 +367,7 @@
case PMC_CPU_INTEL_BROADWELL_XEON:
case PMC_CPU_INTEL_SKYLAKE_XEON:
case PMC_CPU_INTEL_SKYLAKE:
+ case PMC_CPU_INTEL_KABYLAKE:
case PMC_CPU_INTEL_CORE:
case PMC_CPU_INTEL_CORE2:
case PMC_CPU_INTEL_CORE2EXTREME:
@@ -389,10 +404,10 @@
KASSERT(0, ("[intel,%d] unknown CPU type", __LINE__));
}
+#if defined(__i386__) || defined(__amd64__)
/*
* Uncore.
*/
-#if defined(__i386__) || defined(__amd64__)
switch (md->pmd_cputype) {
case PMC_CPU_INTEL_BROADWELL:
case PMC_CPU_INTEL_COREI7:
@@ -404,5 +419,11 @@
default:
break;
}
+
+ /*
+ * Intel Processor Tracing (PT).
+ */
+ if (md->pmd_cputype == PMC_CPU_INTEL_KABYLAKE)
+ pmc_pt_finalize(md);
#endif
}
Index: sys/dev/hwpmc/hwpmc_mod.c
===================================================================
--- sys/dev/hwpmc/hwpmc_mod.c
+++ sys/dev/hwpmc/hwpmc_mod.c
@@ -74,6 +74,7 @@
#include <vm/vm_object.h>
#include "hwpmc_soft.h"
+#include "hwpmc_vm.h"
/*
* Types
@@ -1295,6 +1296,8 @@
pp->pp_pmcs[ri].pp_pmcval;
pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount;
mtx_pool_unlock_spin(pmc_mtxpool, pm);
+ } else if (PMC_TO_MODE(pm) == PMC_MODE_TT) {
+ /* Nothing */
} else {
KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
("[pmc,%d] illegal mode=%d", __LINE__,
@@ -1310,7 +1313,8 @@
pcd->pcd_write_pmc(cpu, adjri, newvalue);
/* If a sampling mode PMC, reset stalled state. */
- if (PMC_TO_MODE(pm) == PMC_MODE_TS)
+ if (PMC_TO_MODE(pm) == PMC_MODE_TS ||
+ PMC_TO_MODE(pm) == PMC_MODE_TT)
CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
/* Indicate that we desire this to run. */
@@ -1472,7 +1476,8 @@
pp->pp_pmcs[ri].pp_pmcval,
pm->pm_sc.pm_reloadcount));
mtx_pool_unlock_spin(pmc_mtxpool, pm);
-
+ } else if (mode == PMC_MODE_TT) {
+ /* Nothing */
} else {
tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
@@ -1528,6 +1533,10 @@
const struct pmc *pm;
struct pmc_owner *po;
const struct pmc_process *pp;
+ struct proc *p;
+ bool pause_thread;
+
+ sx_slock(&pmc_sx);
freepath = fullpath = NULL;
pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
@@ -1539,17 +1548,42 @@
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
- if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
+ if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) {
+ sx_sunlock(&pmc_sx);
goto done;
+ }
+
+ p = td->td_proc;
+ if ((p->p_flag & P_HWPMC) == 0) {
+ sx_sunlock(&pmc_sx);
+ goto done;
+ }
+
+ pause_thread = 0;
/*
* Inform sampling PMC owners tracking this process.
*/
- for (ri = 0; ri < md->pmd_npmc; ri++)
- if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
- PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ for (ri = 0; ri < md->pmd_npmc; ri++) {
+ if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+ continue;
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+ PMC_TO_MODE(pm) == PMC_MODE_TT)
pmclog_process_map_in(pm->pm_owner,
pid, pkm->pm_address, fullpath);
+ if (PMC_TO_MODE(pm) == PMC_MODE_TT)
+ pause_thread = 1;
+ }
+
+ sx_sunlock(&pmc_sx);
+
+ if (pause_thread) {
+ PROC_LOCK(td->td_proc);
+ PROC_SLOCK(td->td_proc);
+ thread_suspend_switch(td, td->td_proc);
+ PROC_SUNLOCK(td->td_proc);
+ PROC_UNLOCK(td->td_proc);
+ }
done:
if (freepath)
@@ -1580,11 +1614,14 @@
if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
return;
- for (ri = 0; ri < md->pmd_npmc; ri++)
- if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
- PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ for (ri = 0; ri < md->pmd_npmc; ri++) {
+ if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+ continue;
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+ PMC_TO_MODE(pm) == PMC_MODE_TT)
pmclog_process_map_out(pm->pm_owner, pid,
pkm->pm_address, pkm->pm_address + pkm->pm_size);
+ }
}
/*
@@ -1598,7 +1635,8 @@
struct pmckern_map_in *km, *kmbase;
sx_assert(&pmc_sx, SX_LOCKED);
- KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+ KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+ PMC_TO_MODE(pm) == PMC_MODE_ST,
("[pmc,%d] non-sampling PMC (%p) desires mapping information",
__LINE__, (void *) pm));
@@ -1999,7 +2037,6 @@
break;
case PMC_FN_MMAP:
- sx_assert(&pmc_sx, SX_LOCKED);
pmc_process_mmap(td, (struct pmckern_map_in *) arg);
break;
@@ -2115,8 +2152,8 @@
mtx_lock_spin(&pmc_processhash_mtx);
LIST_FOREACH(pp, pph, pp_next)
- if (pp->pp_proc == p)
- break;
+ if (pp->pp_proc == p)
+ break;
if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
LIST_REMOVE(pp, pp_next);
@@ -2652,7 +2689,8 @@
* If this is a sampling mode PMC, log mapping information for
* the kernel modules that are currently loaded.
*/
- if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) ||
+ PMC_TO_MODE(pm) == PMC_MODE_ST)
pmc_log_kernel_mappings(pm);
if (PMC_IS_VIRTUAL_MODE(mode)) {
@@ -3306,9 +3344,14 @@
mode = pa.pm_mode;
cpu = pa.pm_cpu;
- if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC &&
- mode != PMC_MODE_TS && mode != PMC_MODE_TC) ||
- (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
+ if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
+ mode != PMC_MODE_SC && mode != PMC_MODE_TC &&
+ mode != PMC_MODE_ST && mode != PMC_MODE_TT) {
+ error = EINVAL;
+ break;
+ }
+
+ if (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max()) {
error = EINVAL;
break;
}
@@ -3755,6 +3798,175 @@
}
break;
+ case PMC_OP_LOG_KERNEL_MAP:
+ {
+ struct pmc_op_simple sp;
+ struct pmc *pm;
+
+ if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
+ break;
+
+ /* locate pmc descriptor */
+ if ((error = pmc_find_pmc(sp.pm_pmcid, &pm)) != 0)
+ break;
+
+ if (PMC_TO_MODE(pm) != PMC_MODE_ST)
+ break;
+
+ if (pm->pm_state != PMC_STATE_ALLOCATED &&
+ pm->pm_state != PMC_STATE_STOPPED &&
+ pm->pm_state != PMC_STATE_RUNNING) {
+ error = EINVAL;
+ break;
+ }
+
+ pmc_log_kernel_mappings(pm);
+ }
+ break;
+
+ case PMC_OP_THREAD_UNSUSPEND:
+ {
+ struct pmc_op_proc_unsuspend u;
+ struct proc *p;
+ struct pmc *pm;
+
+ if ((error = copyin(arg, &u, sizeof(u))) != 0)
+ break;
+
+ /* locate pmc descriptor */
+ if ((error = pmc_find_pmc(u.pm_pmcid, &pm)) != 0)
+ break;
+
+ /* lookup pid */
+ if ((p = pfind(u.pm_pid)) == NULL) {
+ error = ESRCH;
+ break;
+ }
+
+ if ((p->p_flag & P_HWPMC) == 0)
+ break;
+
+ PROC_SLOCK(p);
+ thread_unsuspend(p);
+ PROC_SUNLOCK(p);
+ PROC_UNLOCK(p);
+ }
+ break;
+
+ case PMC_OP_TRACE_CONFIG:
+ {
+ struct pmc_op_trace_config trc;
+ uint64_t *ranges;
+ struct pmc *pm;
+ struct pmc_binding pb;
+ struct pmc_classdep *pcd;
+ uint32_t nranges;
+ uint32_t cpu;
+ uint32_t ri;
+ int adjri;
+
+ if ((error = copyin(arg, &trc, sizeof(trc))) != 0)
+ break;
+
+ /* locate pmc descriptor */
+ if ((error = pmc_find_pmc(trc.pm_pmcid, &pm)) != 0)
+ break;
+
+ if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+ PMC_TO_MODE(pm) != PMC_MODE_TT)
+ break;
+
+ /* Can't proceed with PMC that hasn't been started. */
+ if (pm->pm_state != PMC_STATE_ALLOCATED &&
+ pm->pm_state != PMC_STATE_STOPPED &&
+ pm->pm_state != PMC_STATE_RUNNING) {
+ error = EINVAL;
+ break;
+ }
+
+ cpu = trc.pm_cpu;
+
+ ri = PMC_TO_ROWINDEX(pm);
+ pcd = pmc_ri_to_classdep(md, ri, &adjri);
+ if (pcd->pcd_trace_config == NULL)
+ break;
+
+ /* switch to CPU 'cpu' */
+ pmc_save_cpu_binding(&pb);
+ pmc_select_cpu(cpu);
+
+ ranges = trc.ranges;
+ nranges = trc.nranges;
+
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ error = (*pcd->pcd_trace_config)(cpu, adjri,
+ pm, ranges, nranges);
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+ pmc_restore_cpu_binding(&pb);
+ }
+ break;
+
+ /*
+ * Read a PMC trace buffer ptr.
+ */
+ case PMC_OP_TRACE_READ:
+ {
+ struct pmc_op_trace_read trr;
+ struct pmc_op_trace_read *trr_ret;
+ struct pmc_binding pb;
+ struct pmc_classdep *pcd;
+ struct pmc *pm;
+ pmc_value_t cycle;
+ pmc_value_t offset;
+ uint32_t cpu;
+ uint32_t ri;
+ int adjri;
+
+ if ((error = copyin(arg, &trr, sizeof(trr))) != 0)
+ break;
+
+ /* locate pmc descriptor */
+ if ((error = pmc_find_pmc(trr.pm_pmcid, &pm)) != 0)
+ break;
+
+ if (PMC_TO_MODE(pm) != PMC_MODE_ST &&
+ PMC_TO_MODE(pm) != PMC_MODE_TT)
+ break;
+
+ /* Can't read a PMC that hasn't been started. */
+ if (pm->pm_state != PMC_STATE_ALLOCATED &&
+ pm->pm_state != PMC_STATE_STOPPED &&
+ pm->pm_state != PMC_STATE_RUNNING) {
+ error = EINVAL;
+ break;
+ }
+
+ cpu = trr.pm_cpu;
+
+ ri = PMC_TO_ROWINDEX(pm);
+ pcd = pmc_ri_to_classdep(md, ri, &adjri);
+
+ /* switch to CPU 'cpu' */
+ pmc_save_cpu_binding(&pb);
+ pmc_select_cpu(cpu);
+
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ error = (*pcd->pcd_read_trace)(cpu, adjri,
+ pm, &cycle, &offset);
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+ pmc_restore_cpu_binding(&pb);
+
+ trr_ret = (struct pmc_op_trace_read *)arg;
+ if ((error = copyout(&cycle, &trr_ret->pm_cycle,
+ sizeof(trr.pm_cycle))))
+ break;
+ if ((error = copyout(&offset, &trr_ret->pm_offset,
+ sizeof(trr.pm_offset))))
+ break;
+ }
+ break;
/*
* Read and/or write a PMC.
@@ -3858,7 +4070,7 @@
/* save old value */
if (prw.pm_flags & PMC_F_OLDVALUE)
if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
- &oldvalue)))
+ &oldvalue)))
goto error;
/* write out new value */
if (prw.pm_flags & PMC_F_NEWVALUE)
@@ -5029,6 +5241,8 @@
printf("\n");
}
+ pmc_vm_initialize(md);
+
return (error);
}
@@ -5181,6 +5395,7 @@
}
pmclog_shutdown();
+ pmc_vm_finalize();
sx_xunlock(&pmc_sx); /* we are done */
}
Index: sys/dev/hwpmc/hwpmc_pt.h
===================================================================
--- sys/dev/hwpmc/hwpmc_pt.h
+++ sys/dev/hwpmc/hwpmc_pt.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _DEV_HWPMC_PT_H_
+#define _DEV_HWPMC_PT_H_
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <vm/vm.h>
+
+#include <machine/frame.h>
+
+#define PT_CPUID 0x14
+#define PT_NADDR 4
+#define PT_NPMCS 1
+
+struct pmc_md_pt_op_pmcallocate {
+ uint32_t flags;
+#define INTEL_PT_FLAG_BRANCHES (1 << 0)
+#define INTEL_PT_FLAG_TSC (1 << 1)
+#define INTEL_PT_FLAG_MTC (1 << 2)
+#define INTEL_PT_FLAG_DISRETC (1 << 3)
+ uint64_t ranges[2 * PT_NADDR];
+ int nranges;
+};
+
+#ifdef _KERNEL
+struct xsave_header {
+ uint64_t xsave_bv;
+ uint64_t xcomp_bv;
+ uint8_t reserved[48];
+};
+
+struct pt_ext_area {
+ uint64_t rtit_ctl;
+ uint64_t rtit_output_base;
+ uint64_t rtit_output_mask_ptrs;
+ uint64_t rtit_status;
+ uint64_t rtit_cr3_match;
+ uint64_t rtit_addr0_a;
+ uint64_t rtit_addr0_b;
+ uint64_t rtit_addr1_a;
+ uint64_t rtit_addr1_b;
+};
+
+struct pt_save_area {
+ uint8_t legacy_state[512];
+ struct xsave_header header;
+ struct pt_ext_area pt_ext_area;
+} __aligned(64);
+
+struct topa_entry {
+ uint64_t base;
+ uint64_t size;
+ uint64_t offset;
+};
+
+struct pt_buffer {
+ uint64_t *topa_hw;
+ struct topa_entry *topa_sw;
+ uint64_t cycle;
+ vm_object_t obj;
+};
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_pt_pmc {
+ struct pt_buffer pt_buffers[MAXCPU];
+};
+
+/*
+ * Prototypes.
+ */
+
+int pmc_pt_initialize(struct pmc_mdep *_md, int _maxcpu);
+void pmc_pt_finalize(struct pmc_mdep *_md);
+int pmc_pt_intr(int cpu, struct trapframe *tf);
+
+#endif /* !_KERNEL */
+#endif /* !_DEV_HWPMC_PT_H */
Index: sys/dev/hwpmc/hwpmc_pt.c
===================================================================
--- sys/dev/hwpmc/hwpmc_pt.c
+++ sys/dev/hwpmc/hwpmc_pt.c
@@ -0,0 +1,952 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+#include <sys/pmckern.h>
+#include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/vmem.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/kthread.h>
+#include <sys/pmclog.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/specialreg.h>
+
+#include <dev/hwpmc/hwpmc_vm.h>
+
+#include <x86/apicvar.h>
+#include <x86/x86_var.h>
+
+static MALLOC_DEFINE(M_PT, "pt", "PT driver");
+static uint64_t pt_xsave_mask;
+
+extern struct cdev *pmc_cdev[MAXCPU];
+
+/*
+ * Intel PT support.
+ */
+
+#define PT_CAPS (PMC_CAP_READ | PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | PMC_CAP_USER)
+
+#define PMC_PT_DEBUG
+#undef PMC_PT_DEBUG
+
+#ifdef PMC_PT_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+struct pt_descr {
+ struct pmc_descr pm_descr; /* "base class" */
+};
+
+static struct pt_descr pt_pmcdesc[PT_NPMCS] =
+{
+ {
+ .pm_descr =
+ {
+ .pd_name = "PT",
+ .pd_class = PMC_CLASS_PT,
+ .pd_caps = PT_CAPS,
+ .pd_width = 64
+ }
+ }
+};
+
+/*
+ * Per-CPU data structure for PTs.
+ */
+
+struct pt_cpu {
+ struct pmc_hw tc_hw;
+ uint32_t l0_eax;
+ uint32_t l0_ebx;
+ uint32_t l0_ecx;
+ uint32_t l1_eax;
+ uint32_t l1_ebx;
+ struct pmc *pm_mmap;
+ uint32_t flags;
+#define FLAG_PT_ALLOCATED (1 << 0)
+ struct pt_save_area save_area;
+};
+
+static struct pt_cpu **pt_pcpu;
+
+static __inline void
+xrstors(char *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+
+ low = mask;
+ hi = mask >> 32;
+ __asm __volatile("xrstors %0" : : "m" (*addr), "a" (low), "d" (hi));
+}
+
+static __inline void
+xsaves(char *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+
+ low = mask;
+ hi = mask >> 32;
+ __asm __volatile("xsaves %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+ "memory");
+}
+
+static void
+pt_save_restore(struct pt_cpu *pt_pc, bool save)
+{
+ uint64_t val;
+
+ clts();
+ val = rxcr(XCR0);
+ load_xcr(XCR0, pt_xsave_mask);
+ wrmsr(MSR_IA32_XSS, XFEATURE_ENABLED_PT);
+ if (save) {
+ KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) != 0,
+ ("%s: PT is disabled", __func__));
+ xsaves((char *)&pt_pc->save_area, XFEATURE_ENABLED_PT);
+ } else {
+ KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) == 0,
+ ("%s: PT is enabled", __func__));
+ xrstors((char *)&pt_pc->save_area, XFEATURE_ENABLED_PT);
+ }
+ load_xcr(XCR0, val);
+ load_cr0(rcr0() | CR0_TS);
+}
+
+static void
+pt_configure_ranges(struct pt_cpu *pt_pc, const uint64_t *ranges,
+ uint32_t nranges)
+{
+ struct pt_ext_area *pt_ext;
+ struct pt_save_area *save_area;
+ int nranges_supp;
+ int n;
+
+ save_area = &pt_pc->save_area;
+ pt_ext = &save_area->pt_ext_area;
+
+ if (pt_pc->l0_ebx & CPUPT_IPF) {
+ /* How many ranges CPU does support ? */
+ nranges_supp = (pt_pc->l1_eax & CPUPT_NADDR_M) >> CPUPT_NADDR_S;
+
+ /* xsave/xrstor supports two ranges only */
+ if (nranges_supp > 2)
+ nranges_supp = 2;
+
+ n = nranges > nranges_supp ? nranges_supp : nranges;
+
+ switch (n) {
+ case 2:
+ pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(1));
+ pt_ext->rtit_addr1_a = ranges[2];
+ pt_ext->rtit_addr1_b = ranges[3];
+ case 1:
+ pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(0));
+ pt_ext->rtit_addr0_a = ranges[0];
+ pt_ext->rtit_addr0_b = ranges[1];
+ default:
+ break;
+ };
+ }
+}
+
+static int
+pt_buffer_allocate(uint32_t cpu, struct pt_buffer *pt_buf)
+{
+ struct pmc_vm_map *map;
+ struct pt_cpu *pt_pc;
+ uint64_t topa_size;
+ uint64_t segsize;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t bufsize;
+ struct cdev_cpu *cc;
+ vm_object_t obj;
+ vm_page_t m;
+ int npages;
+ int ntopa;
+ int req;
+ int i, j;
+
+ pt_pc = pt_pcpu[cpu];
+
+ bufsize = 128 * 1024 * 1024;
+
+ if (pt_pc->l0_ecx & CPUPT_TOPA_MULTI)
+ topa_size = TOPA_SIZE_4K;
+ else
+ topa_size = TOPA_SIZE_128M;
+
+ segsize = PAGE_SIZE << (topa_size >> TOPA_SIZE_S);
+ ntopa = bufsize / segsize;
+ npages = segsize / PAGE_SIZE;
+
+ pt_buf->obj = obj = vm_pager_allocate(OBJT_PHYS, 0, bufsize,
+ PROT_READ, 0, curthread->td_ucred);
+
+ size = roundup2((ntopa + 1) * 8, PAGE_SIZE);
+ pt_buf->topa_hw = malloc(size, M_PT, M_WAITOK | M_ZERO);
+ pt_buf->topa_sw = malloc(ntopa * sizeof(struct topa_entry), M_PT,
+ M_WAITOK | M_ZERO);
+
+ VM_OBJECT_WLOCK(obj);
+ vm_object_reference_locked(obj);
+ offset = 0;
+ for (i = 0; i < ntopa; i++) {
+ req = VM_ALLOC_NOBUSY | VM_ALLOC_ZERO;
+ if (npages == 1)
+ m = vm_page_alloc(obj, i, req);
+ else
+ m = vm_page_alloc_contig(obj, i, req, npages, 0, ~0,
+ bufsize, 0, VM_MEMATTR_DEFAULT);
+ if (m == NULL) {
+ VM_OBJECT_WUNLOCK(obj);
+ printf("%s: Can't allocate memory.\n", __func__);
+ goto error;
+ }
+ for (j = 0; j < npages; j++)
+ m[j].valid = VM_PAGE_BITS_ALL;
+ pt_buf->topa_sw[i].size = segsize;
+ pt_buf->topa_sw[i].offset = offset;
+ pt_buf->topa_hw[i] = VM_PAGE_TO_PHYS(m) | topa_size;
+ if (i == (ntopa - 1))
+ pt_buf->topa_hw[i] |= TOPA_INT;
+
+ offset += segsize;
+ }
+ VM_OBJECT_WUNLOCK(obj);
+
+ /* The last entry is a pointer to the base table. */
+ pt_buf->topa_hw[ntopa] = vtophys(pt_buf->topa_hw) | TOPA_END;
+ pt_buf->cycle = 0;
+
+ map = malloc(sizeof(struct pmc_vm_map), M_PT, M_WAITOK | M_ZERO);
+ map->t = curthread;
+ map->obj = obj;
+ map->buf = pt_buf;
+
+ cc = pmc_cdev[cpu]->si_drv1;
+
+ mtx_lock(&cc->vm_mtx);
+ TAILQ_INSERT_HEAD(&cc->pmc_maplist, map, map_next);
+ mtx_unlock(&cc->vm_mtx);
+
+ return (0);
+
+error:
+ free(pt_buf->topa_hw, M_PT);
+ free(pt_buf->topa_sw, M_PT);
+ vm_object_deallocate(obj);
+
+ return (-1);
+}
+
+static int
+pt_buffer_deallocate(uint32_t cpu, struct pt_buffer *pt_buf)
+{
+ struct pmc_vm_map *map, *map_tmp;
+ struct cdev_cpu *cc;
+
+ cc = pmc_cdev[cpu]->si_drv1;
+
+ mtx_lock(&cc->vm_mtx);
+ TAILQ_FOREACH_SAFE(map, &cc->pmc_maplist, map_next, map_tmp) {
+ if (map->buf == pt_buf) {
+ TAILQ_REMOVE(&cc->pmc_maplist, map, map_next);
+ free(map, M_PT);
+ break;
+ }
+ }
+ mtx_unlock(&cc->vm_mtx);
+
+ free(pt_buf->topa_hw, M_PT);
+ free(pt_buf->topa_sw, M_PT);
+ vm_object_deallocate(pt_buf->obj);
+
+ return (0);
+}
+
+static int
+pt_buffer_prepare(uint32_t cpu, struct pmc *pm,
+ const struct pmc_op_pmcallocate *a)
+{
+ const struct pmc_md_pt_op_pmcallocate *pm_pta;
+ struct pt_cpu *pt_pc;
+ struct pmc_md_pt_pmc *pm_pt;
+ struct pt_buffer *pt_buf;
+ struct xsave_header *hdr;
+ struct pt_ext_area *pt_ext;
+ struct pt_save_area *save_area;
+ enum pmc_mode mode;
+ int error;
+
+ pt_pc = pt_pcpu[cpu];
+ if ((pt_pc->l0_ecx & CPUPT_TOPA) == 0)
+ return (ENXIO); /* We rely on TOPA support */
+
+ pm_pta = (const struct pmc_md_pt_op_pmcallocate *)&a->pm_md.pm_pt;
+ pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+ pt_buf = &pm_pt->pt_buffers[cpu];
+
+ error = pt_buffer_allocate(cpu, pt_buf);
+ if (error != 0) {
+ dprintf("%s: can't allocate buffers\n", __func__);
+ return (EINVAL);
+ }
+
+ save_area = &pt_pc->save_area;
+ bzero(save_area, sizeof(struct pt_save_area));
+
+ hdr = &save_area->header;
+ hdr->xsave_bv = XFEATURE_ENABLED_PT;
+ hdr->xcomp_bv = XFEATURE_ENABLED_PT | (1ULL << 63) /* compaction */;
+
+ pt_ext = &save_area->pt_ext_area;
+
+ pt_ext->rtit_ctl = RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
+ pt_ext->rtit_output_base = (uint64_t)vtophys(pt_buf->topa_hw);
+ pt_ext->rtit_output_mask_ptrs = 0x7f;
+
+ pt_configure_ranges(pt_pc, pm_pta->ranges, pm_pta->nranges);
+
+ /*
+ * TODO
+ * if (sc->l0_ebx & CPUPT_PRW) {
+ * reg |= RTIT_CTL_FUPONPTW;
+ * reg |= RTIT_CTL_PTWEN;
+ * }
+ */
+
+ mode = PMC_TO_MODE(pm);
+ if (mode == PMC_MODE_ST)
+ pt_ext->rtit_ctl |= RTIT_CTL_OS;
+ else if (mode == PMC_MODE_TT)
+ pt_ext->rtit_ctl |= RTIT_CTL_USER;
+ else {
+ dprintf("%s: unsupported mode %d\n", __func__, mode);
+ return (-1);
+ }
+
+ /* Enable FUP, TIP, TIP.PGE, TIP.PGD, TNT, MODE.Exec and MODE.TSX packets */
+ if (pm_pta->flags & INTEL_PT_FLAG_BRANCHES)
+ pt_ext->rtit_ctl |= RTIT_CTL_BRANCHEN;
+
+ if (pm_pta->flags & INTEL_PT_FLAG_TSC)
+ pt_ext->rtit_ctl |= RTIT_CTL_TSCEN;
+
+ if ((pt_pc->l0_ebx & CPUPT_MTC) &&
+ (pm_pta->flags & INTEL_PT_FLAG_MTC))
+ pt_ext->rtit_ctl |= RTIT_CTL_MTCEN;
+
+ if (pm_pta->flags & INTEL_PT_FLAG_DISRETC)
+ pt_ext->rtit_ctl |= RTIT_CTL_DISRETC;
+
+ /*
+ * TODO: specify MTC frequency
+ * Note: Check Bitmap of supported MTC Period Encodings
+ * pt_ext->rtit_ctl |= RTIT_CTL_MTC_FREQ(6);
+ */
+
+ return (0);
+}
+
+static int
+pt_allocate_pmc(int cpu, int ri, struct pmc *pm,
+ const struct pmc_op_pmcallocate *a)
+{
+ struct pt_cpu *pt_pc;
+ int i;
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_PROCTRACE) == 0)
+ return (ENXIO);
+
+ pt_pc = pt_pcpu[cpu];
+
+ dprintf("%s: curthread %lx, cpu %d (curcpu %d)\n", __func__,
+ (uint64_t)curthread, cpu, PCPU_GET(cpuid));
+ dprintf("%s: cpu %d (curcpu %d)\n", __func__,
+ cpu, PCPU_GET(cpuid));
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri >= 0 && ri < PT_NPMCS,
+ ("[pt,%d] illegal row index %d", __LINE__, ri));
+
+ if (a->pm_class != PMC_CLASS_PT)
+ return (EINVAL);
+
+ if (a->pm_ev != PMC_EV_PT_PT)
+ return (EINVAL);
+
+ if ((pm->pm_caps & PT_CAPS) == 0)
+ return (EINVAL);
+
+ if ((pm->pm_caps & ~PT_CAPS) != 0)
+ return (EPERM);
+
+ if (a->pm_mode != PMC_MODE_ST &&
+ a->pm_mode != PMC_MODE_TT)
+ return (EINVAL);
+
+ /* Can't allocate multiple ST */
+ if (a->pm_mode == PMC_MODE_ST &&
+ pt_pc->flags & FLAG_PT_ALLOCATED) {
+ dprintf("error: pt is already allocated for CPU %d\n", cpu);
+ return (EUSERS);
+ }
+
+ if (a->pm_mode == PMC_MODE_TT)
+ for (i = 0; i < pmc_cpu_max(); i++) {
+ if (pt_buffer_prepare(i, pm, a))
+ return (EINVAL);
+ }
+ else
+ if (pt_buffer_prepare(cpu, pm, a))
+ return (EINVAL);
+
+ if (a->pm_mode == PMC_MODE_ST)
+ pt_pc->flags |= FLAG_PT_ALLOCATED;
+
+ return (0);
+}
+
+int
+pmc_pt_intr(int cpu, struct trapframe *tf)
+{
+ struct pmc_md_pt_pmc *pm_pt;
+ struct pt_buffer *pt_buf;
+ struct pt_cpu *pt_pc;
+ struct pmc_hw *phw;
+ struct pmc *pm;
+
+ if (pt_pcpu == NULL)
+ return (0);
+
+ pt_pc = pt_pcpu[cpu];
+ if (pt_pc == NULL)
+ return (0);
+
+ phw = &pt_pc->tc_hw;
+ if (phw == NULL || phw->phw_pmc == NULL)
+ return (0);
+
+ pm = phw->phw_pmc;
+ if (pm == NULL)
+ return (0);
+
+ KASSERT(pm != NULL, ("pm is NULL\n"));
+
+ pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+ pt_buf = &pm_pt->pt_buffers[cpu];
+
+ atomic_add_long(&pt_buf->cycle, 1);
+
+ lapic_reenable_pmc();
+
+ return (1);
+}
+
+static int
+pt_config_pmc(int cpu, int ri, struct pmc *pm)
+{
+ struct pt_cpu *pt_pc;
+ struct pmc_hw *phw;
+
+ dprintf("%s: cpu %d (pm %lx)\n", __func__, cpu, (uint64_t)pm);
+
+ PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ pt_pc = pt_pcpu[cpu];
+ phw = &pt_pc->tc_hw;
+
+ KASSERT(pm == NULL || phw->phw_pmc == NULL,
+ ("[pt,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__,
+ pm, phw->phw_pmc));
+
+ phw->phw_pmc = pm;
+
+ return (0);
+}
+
+static int
+pt_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
+{
+ const struct pt_descr *pd;
+ struct pmc_hw *phw;
+ size_t copied;
+ int error;
+
+ dprintf("%s\n", __func__);
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ phw = &pt_pcpu[cpu]->tc_hw;
+ pd = &pt_pmcdesc[ri];
+
+ if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
+ PMC_NAME_MAX, &copied)) != 0)
+ return (error);
+
+ pi->pm_class = pd->pm_descr.pd_class;
+
+ if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
+ pi->pm_enabled = TRUE;
+ *ppmc = phw->phw_pmc;
+ } else {
+ pi->pm_enabled = FALSE;
+ *ppmc = NULL;
+ }
+
+ return (0);
+}
+
+static int
+pt_get_config(int cpu, int ri, struct pmc **ppm)
+{
+ struct pmc_hw *phw;
+ struct pt_cpu *pt_pc;
+
+ dprintf("%s\n", __func__);
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ pt_pc = pt_pcpu[cpu];
+ phw = &pt_pc->tc_hw;
+
+ *ppm = phw->phw_pmc;
+
+ return (0);
+}
+
+static void
+pt_enumerate(struct pt_cpu *pt_pc)
+{
+ u_int cp[4];
+ u_int *eax;
+ u_int *ebx;
+ u_int *ecx;
+
+ eax = &cp[0];
+ ebx = &cp[1];
+ ecx = &cp[2];
+
+ dprintf("Enumerating part 1\n");
+
+ cpuid_count(PT_CPUID, 0, cp);
+ dprintf("%s: Maximum valid sub-leaf Index: %x\n", __func__, cp[0]);
+ dprintf("%s: ebx %x\n", __func__, cp[1]);
+ dprintf("%s: ecx %x\n", __func__, cp[2]);
+
+ pt_pc->l0_eax = cp[0];
+ pt_pc->l0_ebx = cp[1];
+ pt_pc->l0_ecx = cp[2];
+
+ dprintf("Enumerating part 2\n");
+
+ cpuid_count(PT_CPUID, 1, cp);
+ dprintf("%s: eax %x\n", __func__, cp[0]);
+ dprintf("%s: ebx %x\n", __func__, cp[1]);
+
+ pt_pc->l1_eax = cp[0];
+ pt_pc->l1_ebx = cp[1];
+}
+
+static int
+pt_pcpu_init(struct pmc_mdep *md, int cpu)
+{
+ struct pmc_cpu *pc;
+ struct pt_cpu *pt_pc;
+ u_int cp[4];
+ int ri;
+
+ dprintf("%s: cpu %d\n", __func__, cpu);
+
+ /* We rely on XSAVE support */
+ if ((cpu_feature2 & CPUID2_XSAVE) == 0) {
+ printf("Intel PT: XSAVE is not supported\n");
+ return (ENXIO);
+ }
+
+ cpuid_count(0xd, 0x0, cp);
+ if ((cp[0] & pt_xsave_mask) != pt_xsave_mask) {
+ printf("Intel PT: CPU0 does not support X87 or SSE: %x", cp[0]);
+ return (ENXIO);
+ }
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & (1 << 0)) == 0) {
+ printf("Intel PT: XSAVE compaction is not supported\n");
+ return (ENXIO);
+ }
+
+ if ((cp[0] & (1 << 3)) == 0) {
+ printf("Intel PT: XSAVES/XRSTORS are not supported\n");
+ return (ENXIO);
+ }
+
+ /* Enable XSAVE */
+ load_cr4(rcr4() | CR4_XSAVE);
+
+ KASSERT(cpu == PCPU_GET(cpuid), ("Init on wrong CPU\n"));
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal cpu %d", __LINE__, cpu));
+ KASSERT(pt_pcpu, ("[pt,%d] null pcpu", __LINE__));
+ KASSERT(pt_pcpu[cpu] == NULL, ("[pt,%d] non-null per-cpu",
+ __LINE__));
+
+ pt_pc = malloc(sizeof(struct pt_cpu), M_PT, M_WAITOK | M_ZERO);
+
+ pt_pc->tc_hw.phw_state = PMC_PHW_FLAG_IS_ENABLED |
+ PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
+ PMC_PHW_FLAG_IS_SHAREABLE;
+
+ pt_pcpu[cpu] = pt_pc;
+
+ ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_ri;
+
+ KASSERT(pmc_pcpu, ("[pt,%d] null generic pcpu", __LINE__));
+
+ pc = pmc_pcpu[cpu];
+
+ KASSERT(pc, ("[pt,%d] null generic per-cpu", __LINE__));
+
+ pc->pc_hwpmcs[ri] = &pt_pc->tc_hw;
+
+ pt_enumerate(pt_pc);
+
+ return (0);
+}
+
+static int
+pt_pcpu_fini(struct pmc_mdep *md, int cpu)
+{
+ int ri;
+ struct pmc_cpu *pc;
+ struct pt_cpu *pt_pc;
+
+ dprintf("%s: cpu %d\n", __func__, cpu);
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal cpu %d", __LINE__, cpu));
+ KASSERT(pt_pcpu[cpu] != NULL, ("[pt,%d] null pcpu", __LINE__));
+
+ pt_pc = pt_pcpu[cpu];
+
+ free(pt_pcpu[cpu], M_PT);
+ pt_pcpu[cpu] = NULL;
+
+ ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_ri;
+
+ pc = pmc_pcpu[cpu];
+ pc->pc_hwpmcs[ri] = NULL;
+
+ return (0);
+}
+
+static int
+pt_trace_config(int cpu, int ri, struct pmc *pm,
+ uint64_t *ranges, uint32_t nranges)
+{
+ struct pt_cpu *pt_pc;
+ uint64_t reg;
+
+ dprintf("%s\n", __func__);
+
+ pt_pc = pt_pcpu[cpu];
+
+ KASSERT(cpu == PCPU_GET(cpuid), ("Configuring wrong CPU\n"));
+
+ /* Ensure tracing is turned off */
+ reg = rdmsr(MSR_IA32_RTIT_CTL);
+ if (reg & RTIT_CTL_TRACEEN)
+ pt_save_restore(pt_pc, true);
+
+ pt_configure_ranges(pt_pc, ranges, nranges);
+
+ return (0);
+}
+
+static int
+pt_read_trace(int cpu, int ri, struct pmc *pm,
+ pmc_value_t *cycle, pmc_value_t *voffset)
+{
+ struct pt_ext_area *pt_ext;
+ struct pt_save_area *save_area;
+ struct pmc_md_pt_pmc *pm_pt;
+ struct pt_buffer *pt_buf;
+ struct pt_cpu *pt_pc;
+ uint64_t offset;
+ uint64_t reg;
+ uint32_t idx;
+
+ pt_pc = pt_pcpu[cpu];
+ pt_pc->pm_mmap = pm;
+
+ pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+ pt_buf = &pm_pt->pt_buffers[cpu];
+
+ save_area = &pt_pc->save_area;
+ pt_ext = &save_area->pt_ext_area;
+
+ reg = rdmsr(MSR_IA32_RTIT_CTL);
+ if (reg & RTIT_CTL_TRACEEN)
+ reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
+ else
+ reg = pt_ext->rtit_output_mask_ptrs;
+
+ idx = (reg & 0xffffffff) >> 7;
+ *cycle = pt_buf->cycle;
+
+ offset = reg >> 32;
+ *voffset = pt_buf->topa_sw[idx].offset + offset;
+
+ dprintf("%s: %lx\n", __func__, rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+ dprintf("%s: cycle %ld offset %ld\n", __func__, pt_buf->cycle, offset);
+
+ return (0);
+}
+
+static int
+pt_read_pmc(int cpu, int ri, pmc_value_t *v)
+{
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal ri %d", __LINE__, ri));
+
+ *v = 0;
+
+ return (0);
+}
+
+static int
+pt_release_pmc(int cpu, int ri, struct pmc *pm)
+{
+ struct pmc_md_pt_pmc *pm_pt;
+ struct pt_cpu *pt_pc;
+ enum pmc_mode mode;
+ struct pmc_hw *phw;
+ int i;
+
+ pm_pt = (struct pmc_md_pt_pmc *)&pm->pm_md;
+ pt_pc = pt_pcpu[cpu];
+
+ dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0,
+ ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ phw = &pt_pcpu[cpu]->tc_hw;
+ phw->phw_pmc = NULL;
+
+ KASSERT(phw->phw_pmc == NULL,
+ ("[pt,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
+
+ dprintf("%s: cpu %d, output base %lx\n",
+ __func__, cpu, rdmsr(MSR_IA32_RTIT_OUTPUT_BASE));
+ dprintf("%s: cpu %d, output base ptr %lx\n",
+ __func__, cpu, rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+
+ mode = PMC_TO_MODE(pm);
+ if (mode == PMC_MODE_TT)
+ for (i = 0; i < pmc_cpu_max(); i++)
+ pt_buffer_deallocate(i, &pm_pt->pt_buffers[i]);
+ else
+ pt_buffer_deallocate(cpu, &pm_pt->pt_buffers[cpu]);
+
+ if (mode == PMC_MODE_ST)
+ pt_pc->flags &= ~FLAG_PT_ALLOCATED;
+
+ return (0);
+}
+
+static int
+pt_start_pmc(int cpu, int ri)
+{
+ struct pt_cpu *pt_pc;
+ struct pmc_hw *phw;
+
+ dprintf("%s: cpu %d (curcpu %d)\n", __func__, cpu, PCPU_GET(cpuid));
+
+ pt_pc = pt_pcpu[cpu];
+ phw = &pt_pc->tc_hw;
+ if (phw == NULL || phw->phw_pmc == NULL)
+ return (-1);
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ pt_save_restore(pt_pc, false);
+
+ return (0);
+}
+
+static int
+pt_stop_pmc(int cpu, int ri)
+{
+ struct pt_cpu *pt_pc;
+
+ pt_pc = pt_pcpu[cpu];
+
+ dprintf("%s: cpu %d, output base %lx, ptr %lx\n", __func__, cpu,
+ rdmsr(MSR_IA32_RTIT_OUTPUT_BASE),
+ rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ /*
+ * Save the PT state to memory.
+ * This operation will disable tracing.
+ */
+ pt_save_restore(pt_pc, true);
+
+ return (0);
+}
+
+static int
+pt_write_pmc(int cpu, int ri, pmc_value_t v)
+{
+
+ KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+ ("[pt,%d] illegal CPU value %d", __LINE__, cpu));
+ KASSERT(ri == 0, ("[pt,%d] illegal row-index %d", __LINE__, ri));
+
+ return (0);
+}
+
+int
+pmc_pt_initialize(struct pmc_mdep *md, int maxcpu)
+{
+ struct pmc_classdep *pcd;
+
+ dprintf("%s\n", __func__);
+
+ pt_xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+
+ KASSERT(md != NULL, ("[pt,%d] md is NULL", __LINE__));
+ KASSERT(md->pmd_nclass >= 1, ("[pt,%d] dubious md->nclass %d",
+ __LINE__, md->pmd_nclass));
+
+ pt_pcpu = malloc(sizeof(struct pt_cpu *) * maxcpu, M_PT,
+ M_WAITOK | M_ZERO);
+
+ pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT];
+
+ pcd->pcd_caps = PT_CAPS;
+ pcd->pcd_class = PMC_CLASS_PT;
+ pcd->pcd_num = PT_NPMCS;
+ pcd->pcd_ri = md->pmd_npmc;
+ pcd->pcd_width = 64;
+
+ pcd->pcd_allocate_pmc = pt_allocate_pmc;
+ pcd->pcd_config_pmc = pt_config_pmc;
+ pcd->pcd_describe = pt_describe;
+ pcd->pcd_get_config = pt_get_config;
+ pcd->pcd_pcpu_init = pt_pcpu_init;
+ pcd->pcd_pcpu_fini = pt_pcpu_fini;
+ pcd->pcd_read_pmc = pt_read_pmc;
+ pcd->pcd_read_trace = pt_read_trace;
+ pcd->pcd_trace_config = pt_trace_config;
+ pcd->pcd_release_pmc = pt_release_pmc;
+ pcd->pcd_start_pmc = pt_start_pmc;
+ pcd->pcd_stop_pmc = pt_stop_pmc;
+ pcd->pcd_write_pmc = pt_write_pmc;
+
+ md->pmd_npmc += PT_NPMCS;
+
+ return (0);
+}
+
+void
+pmc_pt_finalize(struct pmc_mdep *md)
+{
+
+ dprintf("%s\n", __func__);
+
+#ifdef INVARIANTS
+ int i, ncpus;
+
+ ncpus = pmc_cpu_max();
+ for (i = 0; i < ncpus; i++)
+ KASSERT(pt_pcpu[i] == NULL, ("[pt,%d] non-null pcpu cpu %d",
+ __LINE__, i));
+
+ KASSERT(md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PT].pcd_class ==
+ PMC_CLASS_PT, ("[pt,%d] class mismatch", __LINE__));
+#endif
+
+ free(pt_pcpu, M_PT);
+ pt_pcpu = NULL;
+}
Index: sys/dev/hwpmc/hwpmc_vm.h
===================================================================
--- sys/dev/hwpmc/hwpmc_vm.h
+++ sys/dev/hwpmc/hwpmc_vm.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DEV_HWPMC_VM_H_
+#define _DEV_HWPMC_VM_H_
+
+int pmc_vm_initialize(struct pmc_mdep *md);
+int pmc_vm_finalize(void);
+
+struct pmc_vm_map {
+ TAILQ_ENTRY(pmc_vm_map) map_next;
+ struct thread *t;
+ vm_object_t obj;
+ void * buf;
+};
+
+struct cdev_cpu {
+ struct pmc_mdep *md;
+ struct mtx vm_mtx;
+ TAILQ_HEAD(, pmc_vm_map) pmc_maplist;
+ uint32_t cpu;
+};
+
+#endif /* !_DEV_HWPMC_VM_H_ */
Index: sys/dev/hwpmc/hwpmc_vm.c
===================================================================
--- sys/dev/hwpmc/hwpmc_vm.c
+++ sys/dev/hwpmc/hwpmc_vm.c
@@ -0,0 +1,134 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmckern.h>
+#include <sys/conf.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#define PMC_VM_DEBUG
+#undef PMC_VM_DEBUG
+
+#ifdef PMC_VM_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+#include "hwpmc_vm.h"
+
+struct cdev *pmc_cdev[MAXCPU];
+
+static int
+pmc_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
+ vm_size_t mapsize, struct vm_object **objp, int nprot)
+{
+ struct pmc_vm_map *map, *map_tmp;
+ struct cdev_cpu *cc;
+
+ cc = cdev->si_drv1;
+
+ if (nprot != PROT_READ || *offset != 0)
+ return (ENXIO);
+
+ mtx_lock(&cc->vm_mtx);
+ TAILQ_FOREACH_SAFE(map, &cc->pmc_maplist, map_next, map_tmp) {
+ if (map->t == curthread) {
+ mtx_unlock(&cc->vm_mtx);
+ *objp = map->obj;
+ return (0);
+ }
+ }
+ mtx_unlock(&cc->vm_mtx);
+
+ return (ENXIO);
+}
+
+static struct cdevsw pmc_cdevsw = {
+ .d_version = D_VERSION,
+ .d_mmap_single = pmc_mmap_single,
+ .d_name = "HWPMC",
+};
+
+int
+pmc_vm_initialize(struct pmc_mdep *md)
+{
+ unsigned int maxcpu;
+ struct cdev_cpu *cc;
+ int cpu;
+
+ maxcpu = pmc_cpu_max();
+
+ for (cpu = 0; cpu < maxcpu; cpu++) {
+ cc = malloc(sizeof(struct cdev_cpu), M_PMC, M_WAITOK | M_ZERO);
+ cc->cpu = cpu;
+ cc->md = md;
+ mtx_init(&cc->vm_mtx, "PMC VM", NULL, MTX_DEF);
+ TAILQ_INIT(&cc->pmc_maplist);
+
+ pmc_cdev[cpu] = make_dev(&pmc_cdevsw, 0, UID_ROOT, GID_WHEEL,
+ 0666, "pmc%d", cpu);
+ pmc_cdev[cpu]->si_drv1 = cc;
+ }
+
+ return (0);
+}
+
+int
+pmc_vm_finalize(void)
+{
+ unsigned int maxcpu;
+ struct cdev_cpu *cc;
+ int cpu;
+
+ maxcpu = pmc_cpu_max();
+
+ for (cpu = 0; cpu < maxcpu; cpu++) {
+ cc = pmc_cdev[cpu]->si_drv1;
+ mtx_destroy(&cc->vm_mtx);
+ free(cc, M_PMC);
+ destroy_dev(pmc_cdev[cpu]);
+ }
+
+ return (0);
+}
Index: sys/dev/hwpmc/pmc_events.h
===================================================================
--- sys/dev/hwpmc/pmc_events.h
+++ sys/dev/hwpmc/pmc_events.h
@@ -4843,6 +4843,13 @@
#define PMC_EV_TSC_FIRST PMC_EV_TSC_TSC
#define PMC_EV_TSC_LAST PMC_EV_TSC_TSC
+/* Intel PT */
+#define __PMC_EV_PT() \
+ __PMC_EV(PT, PT)
+
+#define PMC_EV_PT_FIRST PMC_EV_PT_PT
+#define PMC_EV_PT_LAST PMC_EV_PT_PT
+
/*
* Software events are dynamically defined.
*/
@@ -7141,6 +7148,7 @@
* START #EVENTS DESCRIPTION
* 0 0x1000 Reserved
* 0x1000 0x0001 TSC
+ * 0x1100 0x0001 PT
* 0x2000 0x0080 AMD K7 events
* 0x2080 0x0100 AMD K8 events
* 0x10000 0x0080 INTEL architectural fixed-function events
@@ -7162,6 +7170,8 @@
#define __PMC_EVENTS() \
__PMC_EV_BLOCK(TSC, 0x01000) \
__PMC_EV_TSC() \
+ __PMC_EV_BLOCK(PT, 0x1100) \
+ __PMC_EV_PT() \
__PMC_EV_BLOCK(K7, 0x2000) \
__PMC_EV_K7() \
__PMC_EV_BLOCK(K8, 0x2080) \
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -2489,7 +2489,7 @@
if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) {
pkm.pm_file = vp;
pkm.pm_address = (uintptr_t) *addr;
- PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+ PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_MMAP, (void *) &pkm);
}
}
#endif
Index: sys/modules/hwpmc/Makefile
===================================================================
--- sys/modules/hwpmc/Makefile
+++ sys/modules/hwpmc/Makefile
@@ -6,7 +6,7 @@
KMOD= hwpmc
-SRCS= hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c vnode_if.h
+SRCS= hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c hwpmc_vm.c vnode_if.h
.if ${MACHINE_CPUARCH} == "aarch64"
SRCS+= hwpmc_arm64.c hwpmc_arm64_md.c
@@ -14,6 +14,7 @@
.if ${MACHINE_CPUARCH} == "amd64"
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_piv.c hwpmc_tsc.c
+SRCS+= hwpmc_pt.c
SRCS+= hwpmc_x86.c hwpmc_uncore.c
SRCS+= device_if.h bus_if.h
.endif
Index: sys/sys/pmc.h
===================================================================
--- sys/sys/pmc.h
+++ sys/sys/pmc.h
@@ -101,6 +101,7 @@
__PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \
__PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \
__PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \
+ __PMC_CPU(INTEL_KABYLAKE, 0x9A, "Intel Kabylake") \
__PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \
__PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \
__PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \
@@ -151,7 +152,8 @@
__PMC_CLASS(ARMV7, 0x10, "ARMv7") \
__PMC_CLASS(ARMV8, 0x11, "ARMv8") \
__PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \
- __PMC_CLASS(E500, 0x13, "Freescale e500 class")
+ __PMC_CLASS(E500, 0x13, "Freescale e500 class") \
+ __PMC_CLASS(PT, 0x14, "Intel PT")
enum pmc_class {
#undef __PMC_CLASS
@@ -160,7 +162,7 @@
};
#define PMC_CLASS_FIRST PMC_CLASS_TSC
-#define PMC_CLASS_LAST PMC_CLASS_E500
+#define PMC_CLASS_LAST PMC_CLASS_PT
/*
* A PMC can be in the following states:
@@ -231,7 +233,9 @@
__PMC_MODE(SS, 0) \
__PMC_MODE(SC, 1) \
__PMC_MODE(TS, 2) \
- __PMC_MODE(TC, 3)
+ __PMC_MODE(TC, 3) \
+ __PMC_MODE(ST, 4) \
+ __PMC_MODE(TT, 5)
enum pmc_mode {
#undef __PMC_MODE
@@ -245,11 +249,11 @@
#define PMC_IS_COUNTING_MODE(mode) \
((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
#define PMC_IS_SYSTEM_MODE(mode) \
- ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
+ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC || (mode) == PMC_MODE_ST)
#define PMC_IS_SAMPLING_MODE(mode) \
((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
#define PMC_IS_VIRTUAL_MODE(mode) \
- ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
+ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC || (mode) == PMC_MODE_TT)
/*
* PMC row disposition
@@ -341,7 +345,11 @@
__PMC_OP(PMCSTOP, "Stop a PMC") \
__PMC_OP(WRITELOG, "Write a cookie to the log file") \
__PMC_OP(CLOSELOG, "Close log file") \
- __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")
+ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") \
+ __PMC_OP(LOG_KERNEL_MAP, "Log kernel mappings") \
+ __PMC_OP(THREAD_UNSUSPEND, "Thread unsuspend") \
+ __PMC_OP(TRACE_READ, "Read trace buffer pointer") \
+ __PMC_OP(TRACE_CONFIG, "Setup trace IP ranges")
enum pmc_ops {
@@ -487,7 +495,6 @@
pmc_value_t pm_value; /* new&returned value */
};
-
/*
* OP GETPMCINFO
*
@@ -513,6 +520,40 @@
struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */
};
+/*
+ * OP PROC_UNSUSPEND
+ *
+ * Unsuspend all threads of proc.
+ */
+
+struct pmc_op_proc_unsuspend {
+ pmc_id_t pm_pmcid;
+ pid_t pm_pid;
+};
+
+/*
+ * OP TRACE_CONFIG
+ */
+
+#define PMC_FILTER_MAX_IP_RANGES 4
+
+struct pmc_op_trace_config {
+ pmc_id_t pm_pmcid;
+ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */
+ uint64_t ranges[2 * PMC_FILTER_MAX_IP_RANGES];
+ uint32_t nranges;
+};
+
+/*
+ * OP TRACE_READ
+ */
+
+struct pmc_op_trace_read {
+ pmc_id_t pm_pmcid;
+ uint32_t pm_cpu;
+ pmc_value_t pm_cycle; /* returned value */
+ pmc_value_t pm_offset; /* returned value */
+};
/*
* OP GETCPUINFO
@@ -520,7 +561,6 @@
* Retrieve system CPU information.
*/
-
struct pmc_classinfo {
enum pmc_class pm_class; /* class id */
uint32_t pm_caps; /* counter capabilities */
@@ -951,6 +991,12 @@
int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
+ /* trace */
+ int (*pcd_read_trace)(int _cpu, int _ri, struct pmc *_pm,
+ pmc_value_t *_cycle, pmc_value_t *_offset);
+ int (*pcd_trace_config)(int _cpu, int _ri, struct pmc *_pm,
+ uint64_t *ranges, uint32_t nranges);
+
/* pmc allocation/release */
int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
const struct pmc_op_pmcallocate *_a);
@@ -978,7 +1024,7 @@
* Machine dependent bits needed per CPU type.
*/
-struct pmc_mdep {
+struct pmc_mdep {
uint32_t pmd_cputype; /* from enum pmc_cputype */
uint32_t pmd_npmc; /* number of PMCs per CPU */
uint32_t pmd_nclass; /* number of PMC classes present */
Index: sys/x86/include/specialreg.h
===================================================================
--- sys/x86/include/specialreg.h
+++ sys/x86/include/specialreg.h
@@ -104,6 +104,7 @@
#define XFEATURE_ENABLED_OPMASK 0x00000020
#define XFEATURE_ENABLED_ZMM_HI256 0x00000040
#define XFEATURE_ENABLED_HI16_ZMM 0x00000080
+#define XFEATURE_ENABLED_PT 0x00000100
#define XFEATURE_AVX \
(XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
Index: usr.sbin/Makefile
===================================================================
--- usr.sbin/Makefile
+++ usr.sbin/Makefile
@@ -184,6 +184,7 @@
SUBDIR.${MK_PMC}+= pmccontrol
SUBDIR.${MK_PMC}+= pmcstat
SUBDIR.${MK_PMC}+= pmcstudy
+SUBDIR.${MK_PMC}+= pmctrace
SUBDIR.${MK_PORTSNAP}+= portsnap
SUBDIR.${MK_PPP}+= ppp
SUBDIR.${MK_QUOTAS}+= edquota
Index: usr.sbin/pmctrace/Makefile
===================================================================
--- usr.sbin/pmctrace/Makefile
+++ usr.sbin/pmctrace/Makefile
@@ -0,0 +1,16 @@
+# @(#)Makefile 8.1 (Berkeley) 6/6/93
+# $FreeBSD$
+
+PROG= pmctrace
+SRCS= pmctrace.c
+MAN=
+
+LIBADD= elf pmc pmcstat
+
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= pmctrace_pt.c \
+ pmctrace_pt.h
+LIBADD+= ipt
+.endif
+
+.include <bsd.prog.mk>
Index: usr.sbin/pmctrace/pmctrace.h
===================================================================
--- usr.sbin/pmctrace/pmctrace.h
+++ usr.sbin/pmctrace/pmctrace.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCTRACE_H_
+#define _PMCTRACE_H_
+
+struct mtrace_data {
+ uint64_t ip;
+ int cpu;
+ struct pmcstat_process *pp;
+ uint32_t flags;
+};
+
+struct trace_cpu {
+ uint32_t cycle;
+ uint64_t offset;
+ struct mtrace_data mdata;
+ uint32_t bufsize;
+ void *base;
+ int fd;
+};
+
+struct trace_dev {
+ const char *ev_spec;
+ int (*process)(struct trace_cpu *, struct pmcstat_process *,
+ uint32_t cpu, uint32_t cycle, uint64_t offset, uint32_t flags);
+};
+
+struct pmctrace_config {
+ struct trace_dev *trace_dev;
+ uint32_t flags;
+#define FLAG_BRANCH_TNT (1 << 0) /* Taken/Not Taken */
+};
+
+#endif /* !_PMCTRACE_H_ */
Index: usr.sbin/pmctrace/pmctrace.c
===================================================================
--- usr.sbin/pmctrace/pmctrace.c
+++ usr.sbin/pmctrace/pmctrace.c
@@ -0,0 +1,659 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/event.h>
+#include <sys/cpuset.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+
+#include <assert.h>
+#include <signal.h>
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <libgen.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <sysexits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <libpmcstat.h>
+
+#include "pmctrace.h"
+#if defined(__amd64__)
+#include "pmctrace_pt.h"
+#endif
+
+#define MAX_CPU 4096
+
+#define PMCTRACE_DEBUG
+#undef PMCTRACE_DEBUG
+
+#ifdef PMCTRACE_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+static struct pmcstat_args args;
+static struct kevent kev;
+static struct pmcstat_process *pmcstat_kernproc;
+static struct pmcstat_stats pmcstat_stats;
+static struct trace_cpu *trace_cpus[MAX_CPU];
+static struct pmc_plugins plugins[] = {};
+
+static int pmcstat_sockpair[NSOCKPAIRFD];
+static int pmcstat_kq;
+static int pmcstat_npmcs;
+static int pmcstat_mergepmc;
+static int ps_samples_period;
+
+struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH];
+struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH];
+struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs);
+
+static struct trace_dev trace_devs[] = {
+#if defined(__amd64__)
+ { "pt", ipt_process },
+ { NULL, NULL }
+};
+
+static struct pmctrace_config pmctrace_cfg;
+
+static int
+pmctrace_ncpu(void)
+{
+ size_t ncpu_size;
+ int error;
+ int ncpu;
+
+ ncpu_size = sizeof(ncpu);
+ error = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
+ if (error)
+ return (-1);
+
+ return (ncpu);
+}
+
+static int
+pmctrace_init_cpu(uint32_t cpu)
+{
+ struct trace_cpu *tc;
+ char filename[16];
+ struct mtrace_data *mdata;
+
+ tc = trace_cpus[cpu];
+ mdata = &tc->mdata;
+ mdata->ip = 0;
+ mdata->cpu = cpu;
+
+ sprintf(filename, "/dev/pmc%d", cpu);
+
+ tc->fd = open(filename, O_RDWR);
+ if (tc->fd < 0) {
+ printf("Can't open %s\n", filename);
+ return (-1);
+ }
+
+ tc->bufsize = 128 * 1024 * 1024;
+ tc->cycle = 0;
+ tc->offset = 0;
+
+ tc->base = mmap(NULL, tc->bufsize, PROT_READ, MAP_SHARED, tc->fd, 0);
+ if (tc->base == MAP_FAILED) {
+ printf("mmap failed: err %d\n", errno);
+ return (-1);
+ }
+ dprintf("%s: tc->base %lx\n", __func__, *(uint64_t *)tc->base);
+
+ return (0);
+}
+
+static int
+pmctrace_process_cpu(int cpu, struct pmcstat_ev *ev)
+{
+ struct pmcstat_process *pp;
+ struct pmcstat_target *pt;
+ pmc_value_t offset;
+ pmc_value_t cycle;
+ struct trace_cpu *tc;
+ struct trace_dev *trace_dev;
+
+ trace_dev = pmctrace_cfg.trace_dev;
+ tc = trace_cpus[cpu];
+
+ pmc_read_trace(cpu, ev->ev_pmcid, &cycle, &offset);
+
+ dprintf("cpu %d cycle %lx offset %lx\n", cpu, cycle, offset);
+
+ pt = SLIST_FIRST(&args.pa_targets);
+ if (pt != NULL)
+ pp = pmcstat_process_lookup(pt->pt_pid, 0);
+ else
+ pp = pmcstat_kernproc;
+
+ if (pp)
+ trace_dev->process(tc, pp, cpu, cycle,
+ offset, pmctrace_cfg.flags);
+ else
+ dprintf("pp not found\n");
+
+ return (0);
+}
+
+static int
+pmctrace_process_all(int user_mode)
+{
+ struct pmcstat_ev *ev;
+ int ncpu;
+ int i;
+
+ ncpu = pmctrace_ncpu();
+ if (ncpu < 0)
+ errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+ if (user_mode) {
+ ev = STAILQ_FIRST(&args.pa_events);
+ for (i = 0; i < ncpu; i++)
+ pmctrace_process_cpu(i, ev);
+ } else
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+ pmctrace_process_cpu(ev->ev_cpu, ev);
+
+ return (0);
+}
+
+static void
+pmctrace_cleanup(void)
+{
+ struct pmcstat_ev *ev;
+
+ /* release allocated PMCs. */
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+ if (ev->ev_pmcid != PMC_ID_INVALID) {
+ if (pmc_stop(ev->ev_pmcid) < 0)
+ err(EX_OSERR,
+ "ERROR: cannot stop pmc 0x%x \"%s\"",
+ ev->ev_pmcid, ev->ev_name);
+ if (pmc_release(ev->ev_pmcid) < 0)
+ err(EX_OSERR,
+ "ERROR: cannot release pmc 0x%x \"%s\"",
+ ev->ev_pmcid, ev->ev_name);
+ }
+
+ /* de-configure the log file if present. */
+ if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE))
+ (void) pmc_configure_logfile(-1);
+
+ if (args.pa_logparser) {
+ pmclog_close(args.pa_logparser);
+ args.pa_logparser = NULL;
+ }
+
+ pmcstat_shutdown_logging(&args, plugins, &pmcstat_stats);
+}
+
+static void
+pmctrace_start_pmcs(void)
+{
+ struct pmcstat_ev *ev;
+
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+ dprintf("starting ev->ev_cpu %d\n", ev->ev_cpu);
+ assert(ev->ev_pmcid != PMC_ID_INVALID);
+ if (pmc_start(ev->ev_pmcid) < 0) {
+ warn("ERROR: Cannot start pmc 0x%x \"%s\"",
+ ev->ev_pmcid, ev->ev_name);
+ pmctrace_cleanup();
+ exit(EX_OSERR);
+ }
+ }
+}
+
+static int
+pmctrace_open_logfile(void)
+{
+ int pipefd[2];
+
+ /*
+ * process the log on the fly by reading it in
+ * through a pipe.
+ */
+ if (pipe(pipefd) < 0)
+ err(EX_OSERR, "ERROR: pipe(2) failed");
+
+ if (fcntl(pipefd[READPIPEFD], F_SETFL, O_NONBLOCK) < 0)
+ err(EX_OSERR, "ERROR: fcntl(2) failed");
+
+ EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD,
+ 0, 0, NULL);
+
+ if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+ err(EX_OSERR, "ERROR: Cannot register kevent");
+
+ args.pa_logfd = pipefd[WRITEPIPEFD];
+ args.pa_flags |= FLAG_HAS_PIPE;
+ args.pa_logparser = pmclog_open(pipefd[READPIPEFD]);
+
+ if (pmc_configure_logfile(args.pa_logfd) < 0)
+ err(EX_OSERR, "ERROR: Cannot configure log file");
+
+ return (0);
+}
+
+static int
+pmctrace_find_kernel(void)
+{
+ struct stat sb;
+ char buffer[PATH_MAX];
+ size_t len;
+ char *tmp;
+
+ /* Default to using the running system kernel. */
+ len = 0;
+ if (sysctlbyname("kern.bootfile", NULL, &len, NULL, 0) == -1)
+ err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
+ args.pa_kernel = malloc(len);
+ if (args.pa_kernel == NULL)
+ errx(EX_SOFTWARE, "ERROR: Out of memory.");
+ if (sysctlbyname("kern.bootfile", args.pa_kernel, &len, NULL, 0) == -1)
+ err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
+
+ /*
+ * Check if 'kerneldir' refers to a file rather than a
+ * directory. If so, use `dirname path` to determine the
+ * kernel directory.
+ */
+ (void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot,
+ args.pa_kernel);
+ if (stat(buffer, &sb) < 0)
+ err(EX_OSERR, "ERROR: Cannot locate kernel \"%s\"",
+ buffer);
+ if (!S_ISREG(sb.st_mode) && !S_ISDIR(sb.st_mode))
+ errx(EX_USAGE, "ERROR: \"%s\": Unsupported file type.",
+ buffer);
+ if (!S_ISDIR(sb.st_mode)) {
+ tmp = args.pa_kernel;
+ args.pa_kernel = strdup(dirname(args.pa_kernel));
+ if (args.pa_kernel == NULL)
+ errx(EX_SOFTWARE, "ERROR: Out of memory");
+ free(tmp);
+ (void) snprintf(buffer, sizeof(buffer), "%s%s",
+ args.pa_fsroot, args.pa_kernel);
+ if (stat(buffer, &sb) < 0)
+ err(EX_OSERR, "ERROR: Cannot stat \"%s\"",
+ buffer);
+ if (!S_ISDIR(sb.st_mode))
+ errx(EX_USAGE,
+ "ERROR: \"%s\" is not a directory.",
+ buffer);
+ }
+
+ return (0);
+}
+
+static void
+pmctrace_setup_cpumask(cpuset_t *cpumask)
+{
+ cpuset_t rootmask;
+
+ /*
+ * The initial CPU mask specifies the root mask of this process
+ * which is usually all CPUs in the system.
+ */
+ if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+ sizeof(rootmask), &rootmask) == -1)
+ err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
+ CPU_COPY(&rootmask, cpumask);
+}
+
+static int
+pmctrace_delayed_start(bool user_mode, char *func_name, char *func_image)
+{
+ uint64_t ranges[2];
+ struct pmcstat_symbol *sym;
+ struct pmcstat_target *pt;
+ struct pmcstat_process *pp;
+ struct pmcstat_ev *ev;
+ uintptr_t addr_start;
+ uintptr_t addr_end;
+ int ncpu;
+ int i;
+
+ if (func_name == NULL || func_image == NULL)
+ return (0);
+
+ ncpu = pmctrace_ncpu();
+ if (ncpu < 0)
+ errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+ if (user_mode) {
+ pt = SLIST_FIRST(&args.pa_targets);
+ if (pt == NULL)
+ errx(EX_SOFTWARE, "ERROR: can't get target.");
+ pp = pmcstat_process_lookup(pt->pt_pid, 0);
+ if (pp == NULL)
+ errx(EX_SOFTWARE, "ERROR: pp is NULL, pid %d\n",
+ (uint32_t)pt->pt_pid);
+ } else
+ pp = pmcstat_kernproc;
+
+ sym = pmcstat_symbol_search_by_name(pp, func_image, func_name,
+ &addr_start, &addr_end);
+ if (!sym)
+ return (0);
+
+ dprintf("%s: SYM addr start %lx end %lx\n",
+ __func__, addr_start, addr_end);
+
+ ranges[0] = addr_start;
+ ranges[1] = addr_end;
+
+ if (user_mode) {
+ ev = STAILQ_FIRST(&args.pa_events);
+ for (i = 0; i < ncpu; i++)
+ pmc_trace_config(i, ev->ev_pmcid, &ranges[0], 1);
+ } else {
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next)
+ pmc_trace_config(ev->ev_cpu,
+ ev->ev_pmcid, &ranges[0], 1);
+ }
+
+ pmctrace_start_pmcs();
+
+ return (1);
+}
+
+
+static int
+pmctrace_run(bool user_mode, char *func_name, char *func_image)
+{
+ struct pmcstat_target *pt;
+ struct pmcstat_process *pp;
+ struct pmcstat_ev *ev;
+ int stopping;
+ int running;
+ int started;
+ int c;
+
+ stopping = 0;
+ running = 10;
+ started = 0;
+
+ if (user_mode) {
+ pmcstat_create_process(pmcstat_sockpair, &args, pmcstat_kq);
+ pmcstat_attach_pmcs(&args);
+ if (func_name == NULL || func_image == NULL) {
+ pmctrace_start_pmcs();
+ started = 1;
+ }
+ pmcstat_start_process(pmcstat_sockpair);
+ } else {
+ if (func_name == NULL || func_image == NULL) {
+ pmctrace_start_pmcs();
+ started = 1;
+ } else {
+ ev = STAILQ_FIRST(&args.pa_events);
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+ pmc_log_kmap(ev->ev_pmcid);
+ }
+ }
+ }
+
+ do {
+ if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
+ if (errno != EINTR)
+ err(EX_OSERR, "ERROR: kevent failed");
+ else
+ continue;
+ }
+
+ dprintf("%s: pmcstat event: filter %d, ident %ld\n",
+ __func__, kev.filter, kev.ident);
+
+ if (kev.flags & EV_ERROR)
+ errc(EX_OSERR, kev.data, "ERROR: kevent failed");
+
+ switch (kev.filter) {
+ case EVFILT_PROC:
+ stopping = 1;
+ break;
+ case EVFILT_READ:
+ args.pa_flags |= FLAG_DO_ANALYSIS;
+ pmcstat_analyze_log(&args, plugins, &pmcstat_stats,
+ pmcstat_kernproc, pmcstat_mergepmc, &pmcstat_npmcs,
+ &ps_samples_period);
+
+ if (started == 0 &&
+ pmctrace_delayed_start(user_mode, func_name, func_image) == 1)
+ started = 1;
+
+ if (user_mode) {
+ pt = SLIST_FIRST(&args.pa_targets);
+ ev = STAILQ_FIRST(&args.pa_events);
+ pmc_proc_unsuspend(ev->ev_pmcid, pt->pt_pid);
+ }
+
+ break;
+ case EVFILT_TIMER:
+ pmc_flush_logfile();
+
+ pp = pmcstat_kernproc;
+ if (!user_mode && TAILQ_EMPTY(&pp->pp_map))
+ break;
+
+ pmctrace_process_all(user_mode);
+
+ if (stopping)
+ running -= 1;
+ break;
+ }
+ } while (running > 0);
+
+ return (0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct pmcstat_ev *ev;
+ bool user_mode;
+ bool supervisor_mode;
+ int option;
+ cpuset_t cpumask;
+ char *func_name;
+ char *func_image;
+ int ncpu;
+ int i;
+
+ bzero(&args, sizeof(struct pmcstat_args));
+ bzero(&pmctrace_cfg, sizeof(struct pmctrace_config));
+
+ func_name = NULL;
+ func_image = NULL;
+
+ user_mode = 0;
+ supervisor_mode = 0;
+
+ STAILQ_INIT(&args.pa_events);
+ SLIST_INIT(&args.pa_targets);
+ CPU_ZERO(&cpumask);
+
+ args.pa_fsroot = strdup("/"); /* TODO */
+
+ pmctrace_find_kernel();
+ pmctrace_setup_cpumask(&cpumask);
+
+ while ((option = getopt(argc, argv,
+ "tu:s:i:f:")) != -1)
+ switch (option) {
+ case 't':
+ /*
+ * Decode 'Taken/Not_Taken branch' packet.
+ * TODO: Intel PT only?
+ */
+ pmctrace_cfg.flags |= FLAG_BRANCH_TNT;
+ break;
+ case 'i':
+ func_image = strdup(optarg);
+ break;
+ case 'f':
+ func_name = strdup(optarg);
+ break;
+ case 'u':
+ case 's':
+ if ((ev = malloc(sizeof(struct pmcstat_ev))) == NULL)
+ errx(EX_SOFTWARE, "ERROR: Out of memory.");
+ if (option == 'u') {
+ user_mode = 1;
+ ev->ev_mode = PMC_MODE_TT;
+ args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
+ } else {
+ ev->ev_mode = PMC_MODE_ST;
+ supervisor_mode = 1;
+ }
+
+ ev->ev_spec = strdup(optarg);
+ if (ev->ev_spec == NULL)
+ errx(EX_SOFTWARE, "ERROR: Out of memory.");
+ break;
+ default:
+ break;
+ };
+
+ if ((user_mode == 0 && supervisor_mode == 0) ||
+ (user_mode == 1 && supervisor_mode == 1))
+ errx(EX_USAGE, "ERROR: specify -u or -s");
+
+ if ((func_image == NULL && func_name != NULL) ||
+ (func_image != NULL && func_name == NULL))
+ errx(EX_USAGE, "ERROR: specify both or neither -i and -f");
+
+ for (i = 0; trace_devs[i].ev_spec != NULL; i++) {
+ if (strncmp(trace_devs[i].ev_spec, ev->ev_spec,
+ strlen(trace_devs[i].ev_spec)) == 0) {
+ /* found */
+ pmctrace_cfg.trace_dev = &trace_devs[i];
+ break;
+ }
+ }
+
+ if (pmctrace_cfg.trace_dev == NULL)
+ errx(EX_SOFTWARE, "ERROR: trace device not found");
+
+ args.pa_argc = (argc -= optind);
+ args.pa_argv = (argv += optind);
+ args.pa_cpumask = cpumask;
+
+ if (user_mode && !argc)
+ errx(EX_USAGE, "ERROR: user mode requires command to be specified");
+ if (supervisor_mode && argc)
+ errx(EX_USAGE, "ERROR: supervisor mode does not require command");
+
+ args.pa_required |= (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE);
+
+ ev->ev_saved = 0LL;
+ ev->ev_pmcid = PMC_ID_INVALID;
+ ev->ev_name = strdup("pmctrace");
+ ev->ev_flags = 0;
+
+ if (!user_mode)
+ ev->ev_cpu = CPU_FFS(&cpumask) - 1;
+ else
+ ev->ev_cpu = PMC_CPU_ANY;
+
+ STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next);
+
+ if (!user_mode) {
+ CPU_CLR(ev->ev_cpu, &cpumask);
+ pmcstat_clone_event_descriptor(ev, &cpumask, &args);
+ CPU_SET(ev->ev_cpu, &cpumask);
+ }
+
+ ncpu = pmctrace_ncpu();
+ if (ncpu < 0)
+ errx(EX_SOFTWARE, "ERROR: Can't get cpus\n");
+
+ if (pmc_init() < 0)
+ err(EX_UNAVAILABLE, "ERROR: Initialization of the pmc(3) library failed");
+
+ if ((pmcstat_kq = kqueue()) < 0)
+ err(EX_OSERR, "ERROR: Cannot allocate kqueue");
+
+ pmctrace_open_logfile();
+
+ STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
+ if (pmc_allocate(ev->ev_spec, ev->ev_mode,
+ ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0)
+ err(EX_OSERR,
+ "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"",
+ PMC_IS_SYSTEM_MODE(ev->ev_mode) ?
+ "system" : "process", ev->ev_spec);
+ }
+
+ for (i = 0; i < ncpu; i++) {
+ trace_cpus[i] = malloc(sizeof(struct trace_cpu));
+ pmctrace_init_cpu(i);
+ }
+
+ EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 100, NULL);
+
+ if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+ err(EX_OSERR, "ERROR: Cannot register kevent for timer");
+
+ pmcstat_initialize_logging(&pmcstat_kernproc,
+ &args, plugins, &pmcstat_npmcs, &pmcstat_mergepmc);
+
+ pmctrace_run(user_mode, func_name, func_image);
+
+ return (0);
+}
Index: usr.sbin/pmctrace/pmctrace_pt.h
===================================================================
--- usr.sbin/pmctrace/pmctrace_pt.h
+++ usr.sbin/pmctrace/pmctrace_pt.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCTRACE_PT_H_
+#define _PMCTRACE_PT_H_
+
+int ipt_process(struct trace_cpu *cc, struct pmcstat_process *pp,
+ uint32_t cpu, uint32_t cycle, uint64_t offset, uint32_t flags);
+
+#endif /* !_PMCTRACE_PT_H_ */
Index: usr.sbin/pmctrace/pmctrace_pt.c
===================================================================
--- usr.sbin/pmctrace/pmctrace_pt.c
+++ usr.sbin/pmctrace/pmctrace_pt.c
@@ -0,0 +1,349 @@
+/*-
+ * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by BAE Systems, the University of Cambridge
+ * Computer Laboratory, and Memorial University under DARPA/AFRL contract
+ * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+ * (TC) research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/ttycom.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <curses.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <kvm.h>
+#include <libgen.h>
+#include <limits.h>
+#include <math.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <gelf.h>
+#include <inttypes.h>
+
+#include <libpmcstat.h>
+
+#include "pmctrace.h"
+#include "pmctrace_pt.h"
+
+#include <libipt/pt_cpu.h>
+#include <libipt/pt_last_ip.h>
+#include <libipt/pt_time.h>
+#include <libipt/pt_compiler.h>
+#include <libipt/intel-pt.h>
+
+#define PMCTRACE_PT_DEBUG
+#undef PMCTRACE_PT_DEBUG
+
+#ifdef PMCTRACE_PT_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+static struct pmcstat_symbol *
+symbol_lookup(struct mtrace_data *mdata)
+{
+ struct pmcstat_image *image;
+ struct pmcstat_symbol *sym;
+ struct pmcstat_pcmap *map;
+ uint64_t newpc;
+ uint64_t ip;
+
+ if (mdata->ip & (1UL << 47))
+ ip = mdata->ip | 0xffffUL << 48;
+ else
+ ip = mdata->ip;
+
+ map = pmcstat_process_find_map(mdata->pp, ip);
+ if (map != NULL) {
+ image = map->ppm_image;
+ newpc = ip - (map->ppm_lowpc +
+ (image->pi_vaddr - image->pi_start));
+ sym = pmcstat_symbol_search(image, newpc);
+ return (sym);
+ } else
+ dprintf("cpu%d: 0x%lx map not found\n", mdata->cpu, ip);
+
+ return (NULL);
+}
+
+static int
+print_tnt_payload(struct mtrace_data *mdata, uint64_t offset __unused,
+ const struct pt_packet_tnt *packet)
+{
+ char payload[48];
+ uint64_t tnt;
+ uint8_t bits;
+ char *begin;
+ char *end;
+
+ bits = packet->bit_size;
+ tnt = packet->payload;
+ begin = &payload[0];
+ end = begin + bits;
+
+ if (sizeof(payload) < bits)
+ end = begin + sizeof(payload);
+
+ for (; begin < end; ++begin, --bits)
+ *begin = tnt & (1ull << (bits - 1)) ? '!' : '.';
+
+ printf("cpu%d: TNT %s\n", mdata->cpu, payload);
+
+ return (0);
+}
+
+static int
+print_ip_payload(struct mtrace_data *mdata, uint64_t offset __unused,
+ const struct pt_packet_ip *packet)
+{
+ struct pmcstat_symbol *sym;
+
+ switch (packet->ipc) {
+ case pt_ipc_suppressed:
+ break;
+ case pt_ipc_update_16:
+ mdata->ip &= ~0xffffUL;
+ mdata->ip |= (packet->ip & 0xffffUL);
+ break;
+ case pt_ipc_update_32:
+ mdata->ip &= ~0xffffffffUL;
+ mdata->ip |= (packet->ip & 0xffffffffUL);
+ break;
+ case pt_ipc_update_48:
+ mdata->ip &= ~0xffffffffffffUL;
+ mdata->ip |= (packet->ip & 0xffffffffffffUL);
+ break;
+ case pt_ipc_sext_48:
+ mdata->ip &= ~0xffffffffffffUL;
+ mdata->ip |= (packet->ip & 0xffffffffffffUL);
+ symbol_lookup(mdata);
+ case pt_ipc_full:
+ mdata->ip = packet->ip;
+ break;
+ default:
+ printf("unknown ipc: %d\n", packet->ipc);
+ return (0);
+ }
+
+ sym = symbol_lookup(mdata);
+ if (sym) {
+ printf("cpu%d: IP 0x%lx %s\n", mdata->cpu, mdata->ip,
+ pmcstat_string_unintern(sym->ps_name));
+ } else
+ dprintf("cpu%d: 0x%lx not found\n", mdata->cpu, mdata->ip);
+
+ return (0);
+}
+
+static int
+dump_packets(struct mtrace_data *mdata, struct pt_packet_decoder *decoder,
+ const struct pt_config *config __unused)
+{
+ struct pt_packet packet;
+ uint64_t offset;
+ int error;
+
+ dprintf("%s\n", __func__);
+
+ while (1) {
+ error = pt_pkt_get_offset(decoder, &offset);
+ if (error < 0)
+ errx(EX_SOFTWARE, "ERROR: can't get offset, err %d\n", error);
+
+ error = pt_pkt_next(decoder, &packet, sizeof(packet));
+ if (error < 0) {
+ dprintf("%s: error %d\n", __func__, error);
+ break;
+ }
+
+ switch (packet.type) {
+ case ppt_invalid:
+ case ppt_unknown:
+ case ppt_pad:
+ case ppt_psb:
+ case ppt_psbend:
+ break;
+ case ppt_fup:
+ case ppt_tip:
+ case ppt_tip_pge:
+ case ppt_tip_pgd:
+ print_ip_payload(mdata, offset, &packet.payload.ip);
+ break;
+ case ppt_tnt_8:
+ case ppt_tnt_64:
+ if (mdata->flags & FLAG_BRANCH_TNT)
+ print_tnt_payload(mdata, offset, &packet.payload.tnt);
+ break;
+ case ppt_mode:
+ case ppt_pip:
+ case ppt_vmcs:
+ case ppt_cbr:
+ break;
+ case ppt_tsc:
+ printf("cpu%d: TSC %ld\n", mdata->cpu, packet.payload.tsc.tsc);
+ break;
+ case ppt_tma:
+ break;
+ case ppt_mtc:
+ printf("cpu%d: MTC %x\n", mdata->cpu, packet.payload.mtc.ctc);
+ break;
+ case ppt_cyc:
+ case ppt_stop:
+ case ppt_ovf:
+ case ppt_mnt:
+ case ppt_exstop:
+ case ppt_mwait:
+ case ppt_pwre:
+ case ppt_pwrx:
+ case ppt_ptw:
+ default:
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+ipt_process_chunk(struct mtrace_data *mdata, uint64_t base,
+ uint64_t start, uint64_t end)
+{
+ struct pt_packet_decoder *decoder;
+ struct pt_config config;
+ int error;
+
+ dprintf("%s\n", __func__);
+
+ memset(&config, 0, sizeof(config));
+ pt_config_init(&config);
+
+ error = pt_cpu_read(&config.cpu);
+ if (error < 0)
+ errx(EX_SOFTWARE, "ERROR: pt_cpu_read failed, err %d\n", error);
+ error = pt_cpu_errata(&config.errata, &config.cpu);
+ if (error < 0)
+ errx(EX_SOFTWARE, "ERROR: can't get errata, err %d\n", error);
+
+ config.begin = (uint8_t *)(base + start);
+ config.end = (uint8_t *)(base + end);
+
+ dprintf("%s: begin %lx end %lx\n", __func__,
+ (uint64_t)config.begin, (uint64_t)config.end);
+
+ decoder = pt_pkt_alloc_decoder(&config);
+ if (decoder == NULL) {
+ printf("Can't allocate decoder\n");
+ return (-1);
+ }
+
+ error = pt_pkt_sync_set(decoder, 0ull);
+ if (error < 0)
+ errx(EX_SOFTWARE, "ERROR: sync_set failed, err %d\n", error);
+ error = pt_pkt_sync_forward(decoder);
+ if (error < 0 && error != -pte_eos)
+ errx(EX_SOFTWARE, "ERROR: sync_forward failed, err %d\n", error);
+
+ while (1) {
+ error = dump_packets(mdata, decoder, &config);
+ if (error == 0)
+ break;
+
+ error = pt_pkt_sync_forward(decoder);
+ if (error < 0) {
+ if (error == -pte_eos)
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+int
+ipt_process(struct trace_cpu *tc, struct pmcstat_process *pp,
+ uint32_t cpu, uint32_t cycle, uint64_t offset,
+ uint32_t flags)
+{
+ struct mtrace_data *mdata;
+
+ mdata = &tc->mdata;
+ mdata->pp = pp;
+ mdata->flags = flags;
+
+ dprintf("%s: cpu %d, cycle %d, offset %ld\n",
+ __func__, cpu, cycle, offset);
+
+ if (offset == tc->offset)
+ return (0);
+
+ if (cycle == tc->cycle) {
+ if (offset > tc->offset) {
+ ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, offset);
+ tc->offset = offset;
+ } else if (offset < tc->offset) {
+ err(EXIT_FAILURE, "cpu%d: offset already processed %lx %lx",
+ cpu, offset, tc->offset);
+ }
+ } else if (cycle > tc->cycle) {
+ if ((cycle - tc->cycle) > 1)
+ err(EXIT_FAILURE, "cpu%d: trace buffers fills up faster than"
+ " we can process it (%d/%d). Consider setting trace filters",
+ cpu, cycle, tc->cycle);
+ ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, tc->bufsize);
+ tc->offset = 0;
+ tc->cycle += 1;
+ ipt_process_chunk(mdata, (uint64_t)tc->base, tc->offset, offset);
+ tc->offset = offset;
+ }
+
+ return (0);
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 8, 10:09 PM (9 h, 32 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25050069
Default Alt Text
D12875.id38483.diff (93 KB)
Attached To
Mode
D12875: HWPMC tracing support (1) -- main
Attached
Detach File
Event Timeline
Log In to Comment