Index: sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- sys/dev/hwpmc/hwpmc_mod.c +++ sys/dev/hwpmc/hwpmc_mod.c @@ -78,6 +78,7 @@ #include #include "hwpmc_soft.h" +#include "hwpmc_vm.h" #define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et) #define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et) @@ -1467,6 +1468,8 @@ ("[pmc,%d] pmcval outside of expected range cpu=%d " "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__, cpu, ri, newvalue, pm->pm_sc.pm_reloadcount)); + } else if (PMC_TO_MODE(pm) == PMC_MODE_TT) { + /* Nothing */ } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, @@ -1482,7 +1485,8 @@ pcd->pcd_write_pmc(cpu, adjri, newvalue); /* If a sampling mode PMC, reset stalled state. */ - if (PMC_TO_MODE(pm) == PMC_MODE_TS) + if (PMC_TO_MODE(pm) == PMC_MODE_TS || + PMC_TO_MODE(pm) == PMC_MODE_TT) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. */ @@ -1663,6 +1667,8 @@ pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); + } else if (mode == PMC_MODE_TT) { + /* Nothing */ } else { tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); @@ -1756,6 +1762,8 @@ const struct pmc *pm; struct pmc_owner *po; const struct pmc_process *pp; + struct proc *p; + bool pause_thread; freepath = fullpath = NULL; MPASS(!in_epoch(global_epoch_preempt)); @@ -1772,14 +1780,40 @@ if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; + p = td->td_proc; + if ((p->p_flag & P_HWPMC) == 0) + goto done; + + pause_thread = false; + /* * Inform sampling PMC owners tracking this process. */ - for (ri = 0; ri < md->pmd_npmc; ri++) - if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && - PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + for (ri = 0; ri < md->pmd_npmc; ri++) { + if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) + continue; + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || + PMC_TO_MODE(pm) == PMC_MODE_TT) pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); + if (PMC_TO_MODE(pm) == PMC_MODE_TT) + pause_thread = true; + } + + /* + * pmclog entry with mmap information just scheduled to ship + * to userspace. This not yet received by pmctrace application. + * Put this thread on pause before we continue. Once user process + * receive log entry, it can reconfigure tracing filters, start + * tracing operation and finally unsuspend this thread. + */ + if (pause_thread) { + PROC_LOCK(td->td_proc); + PROC_SLOCK(td->td_proc); + thread_suspend_switch(td, td->td_proc); + PROC_SUNLOCK(td->td_proc); + PROC_UNLOCK(td->td_proc); + } done: if (freepath) @@ -1813,11 +1847,14 @@ if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; - for (ri = 0; ri < md->pmd_npmc; ri++) - if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && - PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + for (ri = 0; ri < md->pmd_npmc; ri++) { + if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) + continue; + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || + PMC_TO_MODE(pm) == PMC_MODE_TT) pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); + } } /* @@ -1831,7 +1868,8 @@ struct pmckern_map_in *km, *kmbase; MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx)); - KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), + KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || + PMC_TO_MODE(pm) == PMC_MODE_ST, ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); @@ -2594,8 +2632,8 @@ mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) - if (pp->pp_proc == p) - break; + if (pp->pp_proc == p) + break; if ((mode & PMC_FLAG_REMOVE) && pp != NULL) LIST_REMOVE(pp, pp_next); @@ -3160,7 +3198,8 @@ * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ - if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || + PMC_TO_MODE(pm) == PMC_MODE_ST) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { @@ -3834,9 +3873,14 @@ mode = pa.pm_mode; cpu = pa.pm_cpu; - if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && - mode != PMC_MODE_TS && mode != PMC_MODE_TC) || - (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { + if (mode != PMC_MODE_SS && mode != PMC_MODE_TS && + mode != PMC_MODE_SC && mode != PMC_MODE_TC && + mode != PMC_MODE_ST && mode != PMC_MODE_TT) { + error = EINVAL; + break; + } + + if (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max()) { error = EINVAL; break; } @@ -4311,6 +4355,175 @@ } break; + case PMC_OP_LOG_KERNEL_MAP: + { + struct pmc_op_simple sp; + struct pmc *pm; + + if ((error = copyin(arg, &sp, sizeof(sp))) != 0) + break; + + /* locate pmc descriptor */ + if ((error = pmc_find_pmc(sp.pm_pmcid, &pm)) != 0) + break; + + if (PMC_TO_MODE(pm) != PMC_MODE_ST) + break; + + if (pm->pm_state != PMC_STATE_ALLOCATED && + pm->pm_state != PMC_STATE_STOPPED && + pm->pm_state != PMC_STATE_RUNNING) { + error = EINVAL; + break; + } + + pmc_log_kernel_mappings(pm); + } + break; + + case PMC_OP_THREAD_UNSUSPEND: + { + struct pmc_op_proc_unsuspend u; + struct proc *p; + struct pmc *pm; + + if ((error = copyin(arg, &u, sizeof(u))) != 0) + break; + + /* locate pmc descriptor */ + if ((error = pmc_find_pmc(u.pm_pmcid, &pm)) != 0) + break; + + /* lookup pid */ + if ((p = pfind(u.pm_pid)) == NULL) { + error = ESRCH; + break; + } + + if ((p->p_flag & P_HWPMC) == 0) + break; + + PROC_SLOCK(p); + thread_unsuspend(p); + PROC_SUNLOCK(p); + PROC_UNLOCK(p); + } + break; + + case PMC_OP_TRACE_CONFIG: + { + struct pmc_op_trace_config trc; + uint64_t *ranges; + struct pmc *pm; + struct pmc_binding pb; + struct pmc_classdep *pcd; + uint32_t nranges; + uint32_t cpu; + uint32_t ri; + int adjri; + + if ((error = copyin(arg, &trc, sizeof(trc))) != 0) + break; + + /* locate pmc descriptor */ + if ((error = pmc_find_pmc(trc.pm_pmcid, &pm)) != 0) + break; + + if (PMC_TO_MODE(pm) != PMC_MODE_ST && + PMC_TO_MODE(pm) != PMC_MODE_TT) + break; + + /* Can't proceed with PMC that hasn't been started. */ + if (pm->pm_state != PMC_STATE_ALLOCATED && + pm->pm_state != PMC_STATE_STOPPED && + pm->pm_state != PMC_STATE_RUNNING) { + error = EINVAL; + break; + } + + cpu = trc.pm_cpu; + + ri = PMC_TO_ROWINDEX(pm); + pcd = pmc_ri_to_classdep(md, ri, &adjri); + if (pcd->pcd_trace_config == NULL) + break; + + /* switch to CPU 'cpu' */ + pmc_save_cpu_binding(&pb); + pmc_select_cpu(cpu); + + ranges = trc.ranges; + nranges = trc.nranges; + + mtx_pool_lock_spin(pmc_mtxpool, pm); + error = (*pcd->pcd_trace_config)(cpu, adjri, + pm, ranges, nranges); + mtx_pool_unlock_spin(pmc_mtxpool, pm); + + pmc_restore_cpu_binding(&pb); + } + break; + + /* + * Read a PMC trace buffer ptr. + */ + case PMC_OP_TRACE_READ: + { + struct pmc_op_trace_read trr; + struct pmc_op_trace_read *trr_ret; + struct pmc_binding pb; + struct pmc_classdep *pcd; + struct pmc *pm; + pmc_value_t cycle; + pmc_value_t offset; + uint32_t cpu; + uint32_t ri; + int adjri; + + if ((error = copyin(arg, &trr, sizeof(trr))) != 0) + break; + + /* locate pmc descriptor */ + if ((error = pmc_find_pmc(trr.pm_pmcid, &pm)) != 0) + break; + + if (PMC_TO_MODE(pm) != PMC_MODE_ST && + PMC_TO_MODE(pm) != PMC_MODE_TT) + break; + + /* Can't read a PMC that hasn't been started. */ + if (pm->pm_state != PMC_STATE_ALLOCATED && + pm->pm_state != PMC_STATE_STOPPED && + pm->pm_state != PMC_STATE_RUNNING) { + error = EINVAL; + break; + } + + cpu = trr.pm_cpu; + + ri = PMC_TO_ROWINDEX(pm); + pcd = pmc_ri_to_classdep(md, ri, &adjri); + + /* switch to CPU 'cpu' */ + pmc_save_cpu_binding(&pb); + pmc_select_cpu(cpu); + + mtx_pool_lock_spin(pmc_mtxpool, pm); + error = (*pcd->pcd_read_trace)(cpu, adjri, + pm, &cycle, &offset); + mtx_pool_unlock_spin(pmc_mtxpool, pm); + + pmc_restore_cpu_binding(&pb); + + trr_ret = (struct pmc_op_trace_read *)arg; + if ((error = copyout(&cycle, &trr_ret->pm_cycle, + sizeof(trr.pm_cycle)))) + break; + if ((error = copyout(&offset, &trr_ret->pm_offset, + sizeof(trr.pm_offset)))) + break; + } + break; /* * Read and/or write a PMC. @@ -4414,7 +4627,7 @@ /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, - &oldvalue))) + &oldvalue))) goto error; /* write out new value */ if (prw.pm_flags & PMC_F_NEWVALUE) @@ -5584,6 +5797,12 @@ return (ENOSYS); } + error = pmc_vm_initialize(md); + if (error) { + pmc_md_finalize(md); + return (error); + } + KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); @@ -5918,6 +6137,8 @@ } pmclog_shutdown(); + pmc_vm_finalize(); + counter_u64_free(pmc_stats.pm_intr_ignored); counter_u64_free(pmc_stats.pm_intr_processed); counter_u64_free(pmc_stats.pm_intr_bufferfull); Index: sys/sys/pmc.h =================================================================== --- sys/sys/pmc.h +++ sys/sys/pmc.h @@ -110,6 +110,7 @@ __PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \ __PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \ __PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \ + __PMC_CPU(INTEL_KABYLAKE, 0x9A, "Intel Kabylake") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ @@ -160,7 +161,9 @@ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \ - __PMC_CLASS(E500, 0x13, "Freescale e500 class") + __PMC_CLASS(E500, 0x13, "Freescale e500 class") \ + __PMC_CLASS(PT, 0x14, "Intel PT") \ + __PMC_CLASS(CORESIGHT, 0x15, "ARM Coresight") enum pmc_class { #undef __PMC_CLASS @@ -169,7 +172,7 @@ }; #define PMC_CLASS_FIRST PMC_CLASS_TSC -#define PMC_CLASS_LAST PMC_CLASS_E500 +#define PMC_CLASS_LAST PMC_CLASS_CORESIGHT /* * A PMC can be in the following states: @@ -240,7 +243,9 @@ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ - __PMC_MODE(TC, 3) + __PMC_MODE(TC, 3) \ + __PMC_MODE(ST, 4) \ + __PMC_MODE(TT, 5) enum pmc_mode { #undef __PMC_MODE @@ -254,11 +259,11 @@ #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ - ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) + ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC || (mode) == PMC_MODE_ST) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ - ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) + ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC || (mode) == PMC_MODE_TT) /* * PMC row disposition @@ -350,7 +355,11 @@ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ - __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") + __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") \ + __PMC_OP(LOG_KERNEL_MAP, "Log kernel mappings") \ + __PMC_OP(THREAD_UNSUSPEND, "Thread unsuspend") \ + __PMC_OP(TRACE_READ, "Read trace buffer pointer") \ + __PMC_OP(TRACE_CONFIG, "Setup trace IP ranges") enum pmc_ops { @@ -498,7 +507,6 @@ pmc_value_t pm_value; /* new&returned value */ }; - /* * OP GETPMCINFO * @@ -524,6 +532,40 @@ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; +/* + * OP PROC_UNSUSPEND + * + * Unsuspend all threads of proc. + */ + +struct pmc_op_proc_unsuspend { + pmc_id_t pm_pmcid; + pid_t pm_pid; +}; + +/* + * OP TRACE_CONFIG + */ + +#define PMC_FILTER_MAX_IP_RANGES 4 + +struct pmc_op_trace_config { + pmc_id_t pm_pmcid; + uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ + uint64_t ranges[2 * PMC_FILTER_MAX_IP_RANGES]; + uint32_t nranges; +}; + +/* + * OP TRACE_READ + */ + +struct pmc_op_trace_read { + pmc_id_t pm_pmcid; + uint32_t pm_cpu; + pmc_value_t pm_cycle; /* returned value */ + pmc_value_t pm_offset; /* returned value */ +}; /* * OP GETCPUINFO @@ -531,7 +573,6 @@ * Retrieve system CPU information. */ - struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ @@ -1017,6 +1058,12 @@ int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value); + /* trace */ + int (*pcd_read_trace)(int _cpu, int _ri, struct pmc *_pm, + pmc_value_t *_cycle, pmc_value_t *_offset); + int (*pcd_trace_config)(int _cpu, int _ri, struct pmc *_pm, + uint64_t *ranges, uint32_t nranges); + /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); @@ -1044,7 +1091,7 @@ * Machine dependent bits needed per CPU type. */ -struct pmc_mdep { +struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */