diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 3b3026cf086b..5286533bacdc 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -1,1873 +1,1875 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpmcinternal.h" /* Function prototypes */ #if defined(__amd64__) || defined(__i386__) static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__amd64__) || defined(__i386__) static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__arm__) #if defined(__XSCALE__) static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif static int armv7_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__aarch64__) static int arm64_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__mips__) static int mips_allocate_pmc(enum pmc_event _pe, char* ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif /* __mips__ */ static int soft_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #if defined(__powerpc__) static int powerpc_allocate_pmc(enum pmc_event _pe, char* ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif /* __powerpc__ */ #define PMC_CALL(cmd, params) \ syscall(pmc_syscall, PMC_OP_##cmd, (params)) /* * Event aliases provide a way for the user to ask for generic events * like "cache-misses", or "instructions-retired". These aliases are * mapped to the appropriate canonical event descriptions using a * lookup table. */ struct pmc_event_alias { const char *pm_alias; const char *pm_spec; }; static const struct pmc_event_alias *pmc_mdep_event_aliases; /* * The pmc_event_descr structure maps symbolic names known to the user * to integer codes used by the PMC KLD. */ struct pmc_event_descr { const char *pm_ev_name; enum pmc_event pm_ev_code; }; /* * The pmc_class_descr structure maps class name prefixes for * event names to event tables and other PMC class data. */ struct pmc_class_descr { const char *pm_evc_name; size_t pm_evc_name_size; enum pmc_class pm_evc_class; const struct pmc_event_descr *pm_evc_event_table; size_t pm_evc_event_table_size; int (*pm_evc_allocate_pmc)(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pa); }; #define PMC_TABLE_SIZE(N) (sizeof(N)/sizeof(N[0])) #define PMC_EVENT_TABLE_SIZE(N) PMC_TABLE_SIZE(N##_event_table) #undef __PMC_EV #define __PMC_EV(C,N) { #N, PMC_EV_ ## C ## _ ## N }, /* * PMC_CLASSDEP_TABLE(NAME, CLASS) * * Define a table mapping event names and aliases to HWPMC event IDs. */ #define PMC_CLASSDEP_TABLE(N, C) \ static const struct pmc_event_descr N##_event_table[] = \ { \ __PMC_EV_##C() \ } PMC_CLASSDEP_TABLE(iaf, IAF); PMC_CLASSDEP_TABLE(k8, K8); PMC_CLASSDEP_TABLE(xscale, XSCALE); PMC_CLASSDEP_TABLE(armv7, ARMV7); PMC_CLASSDEP_TABLE(armv8, ARMV8); PMC_CLASSDEP_TABLE(mips24k, MIPS24K); PMC_CLASSDEP_TABLE(mips74k, MIPS74K); PMC_CLASSDEP_TABLE(octeon, OCTEON); PMC_CLASSDEP_TABLE(ppc7450, PPC7450); PMC_CLASSDEP_TABLE(ppc970, PPC970); PMC_CLASSDEP_TABLE(e500, E500); static struct pmc_event_descr soft_event_table[PMC_EV_DYN_COUNT]; #undef __PMC_EV_ALIAS #define __PMC_EV_ALIAS(N,CODE) { N, PMC_EV_##CODE }, static const struct pmc_event_descr cortex_a8_event_table[] = { __PMC_EV_ALIAS_ARMV7_CORTEX_A8() }; static const struct pmc_event_descr cortex_a9_event_table[] = { __PMC_EV_ALIAS_ARMV7_CORTEX_A9() }; static const struct pmc_event_descr cortex_a53_event_table[] = { __PMC_EV_ALIAS_ARMV8_CORTEX_A53() }; static const struct pmc_event_descr cortex_a57_event_table[] = { __PMC_EV_ALIAS_ARMV8_CORTEX_A57() }; /* * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...) * * Map a CPU to the PMC classes it supports. */ #define PMC_MDEP_TABLE(N,C,...) \ static const enum pmc_class N##_pmc_classes[] = { \ PMC_CLASS_##C, __VA_ARGS__ \ } PMC_MDEP_TABLE(k8, K8, PMC_CLASS_SOFT, PMC_CLASS_TSC); PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_SOFT, PMC_CLASS_XSCALE); PMC_MDEP_TABLE(cortex_a8, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); PMC_MDEP_TABLE(cortex_a9, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8); PMC_MDEP_TABLE(cortex_a57, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8); PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_SOFT, PMC_CLASS_MIPS24K); PMC_MDEP_TABLE(mips74k, MIPS74K, PMC_CLASS_SOFT, PMC_CLASS_MIPS74K); PMC_MDEP_TABLE(octeon, OCTEON, PMC_CLASS_SOFT, PMC_CLASS_OCTEON); PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450, PMC_CLASS_TSC); PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970, PMC_CLASS_TSC); PMC_MDEP_TABLE(e500, E500, PMC_CLASS_SOFT, PMC_CLASS_E500, PMC_CLASS_TSC); PMC_MDEP_TABLE(generic, SOFT, PMC_CLASS_SOFT); static const struct pmc_event_descr tsc_event_table[] = { __PMC_EV_TSC() }; #undef PMC_CLASS_TABLE_DESC #define PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR) \ static const struct pmc_class_descr NAME##_class_table_descr = \ { \ .pm_evc_name = #CLASS "-", \ .pm_evc_name_size = sizeof(#CLASS "-") - 1, \ .pm_evc_class = PMC_CLASS_##CLASS , \ .pm_evc_event_table = EVENTS##_event_table , \ .pm_evc_event_table_size = \ PMC_EVENT_TABLE_SIZE(EVENTS), \ .pm_evc_allocate_pmc = ALLOCATOR##_allocate_pmc \ } #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(k8, K8, k8, k8); #endif #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc); #endif #if defined(__arm__) #if defined(__XSCALE__) PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale); #endif PMC_CLASS_TABLE_DESC(cortex_a8, ARMV7, cortex_a8, armv7); PMC_CLASS_TABLE_DESC(cortex_a9, ARMV7, cortex_a9, armv7); #endif #if defined(__aarch64__) PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64); PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64); #endif #if defined(__mips__) PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips); PMC_CLASS_TABLE_DESC(mips74k, MIPS74K, mips74k, mips); PMC_CLASS_TABLE_DESC(octeon, OCTEON, octeon, mips); #endif /* __mips__ */ #if defined(__powerpc__) PMC_CLASS_TABLE_DESC(ppc7450, PPC7450, ppc7450, powerpc); PMC_CLASS_TABLE_DESC(ppc970, PPC970, ppc970, powerpc); PMC_CLASS_TABLE_DESC(e500, E500, e500, powerpc); #endif static struct pmc_class_descr soft_class_table_descr = { .pm_evc_name = "SOFT-", .pm_evc_name_size = sizeof("SOFT-") - 1, .pm_evc_class = PMC_CLASS_SOFT, .pm_evc_event_table = NULL, .pm_evc_event_table_size = 0, .pm_evc_allocate_pmc = soft_allocate_pmc }; #undef PMC_CLASS_TABLE_DESC static const struct pmc_class_descr **pmc_class_table; #define PMC_CLASS_TABLE_SIZE cpu_info.pm_nclass static const enum pmc_class *pmc_mdep_class_list; static size_t pmc_mdep_class_list_size; /* * Mapping tables, mapping enumeration values to human readable * strings. */ static const char * pmc_capability_names[] = { #undef __PMC_CAP #define __PMC_CAP(N,V,D) #N , __PMC_CAPS() }; struct pmc_class_map { enum pmc_class pm_class; const char *pm_name; }; static const struct pmc_class_map pmc_class_names[] = { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) { .pm_class = PMC_CLASS_##S, .pm_name = #S } , __PMC_CLASSES() }; struct pmc_cputype_map { enum pmc_cputype pm_cputype; const char *pm_name; }; static const struct pmc_cputype_map pmc_cputype_names[] = { #undef __PMC_CPU #define __PMC_CPU(S, V, D) { .pm_cputype = PMC_CPU_##S, .pm_name = #S } , __PMC_CPUS() }; static const char * pmc_disposition_names[] = { #undef __PMC_DISP #define __PMC_DISP(D) #D , __PMC_DISPOSITIONS() }; static const char * pmc_mode_names[] = { #undef __PMC_MODE #define __PMC_MODE(M,N) #M , __PMC_MODES() }; static const char * pmc_state_names[] = { #undef __PMC_STATE #define __PMC_STATE(S) #S , __PMC_STATES() }; /* * Filled in by pmc_init(). */ static int pmc_syscall = -1; static struct pmc_cpuinfo cpu_info; static struct pmc_op_getdyneventinfo soft_event_info; /* Event masks for events */ struct pmc_masks { const char *pm_name; const uint64_t pm_value; }; #define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) } #define NULLMASK { .pm_name = NULL } #if defined(__amd64__) || defined(__i386__) static int pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint64_t *evmask) { const struct pmc_masks *pm; char *q, *r; int c; if (pmask == NULL) /* no mask keywords */ return (-1); q = strchr(p, '='); /* skip '=' */ if (*++q == '\0') /* no more data */ return (-1); c = 0; /* count of mask keywords seen */ while ((r = strsep(&q, "+")) != NULL) { for (pm = pmask; pm->pm_name && strcasecmp(r, pm->pm_name); pm++) ; if (pm->pm_name == NULL) /* not found */ return (-1); *evmask |= pm->pm_value; c++; } return (c); } #endif #define KWMATCH(p,kw) (strcasecmp((p), (kw)) == 0) #define KWPREFIXMATCH(p,kw) (strncasecmp((p), (kw), sizeof((kw)) - 1) == 0) #define EV_ALIAS(N,S) { .pm_alias = N, .pm_spec = S } #if defined(__amd64__) || defined(__i386__) /* * AMD K8 PMCs. * */ static struct pmc_event_alias k8_aliases[] = { EV_ALIAS("branches", "k8-fr-retired-taken-branches"), EV_ALIAS("branch-mispredicts", "k8-fr-retired-taken-branches-mispredicted"), EV_ALIAS("cycles", "tsc"), EV_ALIAS("dc-misses", "k8-dc-miss"), EV_ALIAS("ic-misses", "k8-ic-miss"), EV_ALIAS("instructions", "k8-fr-retired-x86-instructions"), EV_ALIAS("interrupts", "k8-fr-taken-hardware-interrupts"), EV_ALIAS("unhalted-cycles", "k8-bu-cpu-clk-unhalted"), EV_ALIAS(NULL, NULL) }; #define __K8MASK(N,V) PMCMASK(N,(1 << (V))) /* * Parsing tables */ /* fp dispatched fpu ops */ static const struct pmc_masks k8_mask_fdfo[] = { __K8MASK(add-pipe-excluding-junk-ops, 0), __K8MASK(multiply-pipe-excluding-junk-ops, 1), __K8MASK(store-pipe-excluding-junk-ops, 2), __K8MASK(add-pipe-junk-ops, 3), __K8MASK(multiply-pipe-junk-ops, 4), __K8MASK(store-pipe-junk-ops, 5), NULLMASK }; /* ls segment register loads */ static const struct pmc_masks k8_mask_lsrl[] = { __K8MASK(es, 0), __K8MASK(cs, 1), __K8MASK(ss, 2), __K8MASK(ds, 3), __K8MASK(fs, 4), __K8MASK(gs, 5), __K8MASK(hs, 6), NULLMASK }; /* ls locked operation */ static const struct pmc_masks k8_mask_llo[] = { __K8MASK(locked-instructions, 0), __K8MASK(cycles-in-request, 1), __K8MASK(cycles-to-complete, 2), NULLMASK }; /* dc refill from {l2,system} and dc copyback */ static const struct pmc_masks k8_mask_dc[] = { __K8MASK(invalid, 0), __K8MASK(shared, 1), __K8MASK(exclusive, 2), __K8MASK(owner, 3), __K8MASK(modified, 4), NULLMASK }; /* dc one bit ecc error */ static const struct pmc_masks k8_mask_dobee[] = { __K8MASK(scrubber, 0), __K8MASK(piggyback, 1), NULLMASK }; /* dc dispatched prefetch instructions */ static const struct pmc_masks k8_mask_ddpi[] = { __K8MASK(load, 0), __K8MASK(store, 1), __K8MASK(nta, 2), NULLMASK }; /* dc dcache accesses by locks */ static const struct pmc_masks k8_mask_dabl[] = { __K8MASK(accesses, 0), __K8MASK(misses, 1), NULLMASK }; /* bu internal l2 request */ static const struct pmc_masks k8_mask_bilr[] = { __K8MASK(ic-fill, 0), __K8MASK(dc-fill, 1), __K8MASK(tlb-reload, 2), __K8MASK(tag-snoop, 3), __K8MASK(cancelled, 4), NULLMASK }; /* bu fill request l2 miss */ static const struct pmc_masks k8_mask_bfrlm[] = { __K8MASK(ic-fill, 0), __K8MASK(dc-fill, 1), __K8MASK(tlb-reload, 2), NULLMASK }; /* bu fill into l2 */ static const struct pmc_masks k8_mask_bfil[] = { __K8MASK(dirty-l2-victim, 0), __K8MASK(victim-from-l2, 1), NULLMASK }; /* fr retired fpu instructions */ static const struct pmc_masks k8_mask_frfi[] = { __K8MASK(x87, 0), __K8MASK(mmx-3dnow, 1), __K8MASK(packed-sse-sse2, 2), __K8MASK(scalar-sse-sse2, 3), NULLMASK }; /* fr retired fastpath double op instructions */ static const struct pmc_masks k8_mask_frfdoi[] = { __K8MASK(low-op-pos-0, 0), __K8MASK(low-op-pos-1, 1), __K8MASK(low-op-pos-2, 2), NULLMASK }; /* fr fpu exceptions */ static const struct pmc_masks k8_mask_ffe[] = { __K8MASK(x87-reclass-microfaults, 0), __K8MASK(sse-retype-microfaults, 1), __K8MASK(sse-reclass-microfaults, 2), __K8MASK(sse-and-x87-microtraps, 3), NULLMASK }; /* nb memory controller page access event */ static const struct pmc_masks k8_mask_nmcpae[] = { __K8MASK(page-hit, 0), __K8MASK(page-miss, 1), __K8MASK(page-conflict, 2), NULLMASK }; /* nb memory controller turnaround */ static const struct pmc_masks k8_mask_nmct[] = { __K8MASK(dimm-turnaround, 0), __K8MASK(read-to-write-turnaround, 1), __K8MASK(write-to-read-turnaround, 2), NULLMASK }; /* nb memory controller bypass saturation */ static const struct pmc_masks k8_mask_nmcbs[] = { __K8MASK(memory-controller-hi-pri-bypass, 0), __K8MASK(memory-controller-lo-pri-bypass, 1), __K8MASK(dram-controller-interface-bypass, 2), __K8MASK(dram-controller-queue-bypass, 3), NULLMASK }; /* nb sized commands */ static const struct pmc_masks k8_mask_nsc[] = { __K8MASK(nonpostwrszbyte, 0), __K8MASK(nonpostwrszdword, 1), __K8MASK(postwrszbyte, 2), __K8MASK(postwrszdword, 3), __K8MASK(rdszbyte, 4), __K8MASK(rdszdword, 5), __K8MASK(rdmodwr, 6), NULLMASK }; /* nb probe result */ static const struct pmc_masks k8_mask_npr[] = { __K8MASK(probe-miss, 0), __K8MASK(probe-hit, 1), __K8MASK(probe-hit-dirty-no-memory-cancel, 2), __K8MASK(probe-hit-dirty-with-memory-cancel, 3), NULLMASK }; /* nb hypertransport bus bandwidth */ static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */ __K8MASK(command, 0), __K8MASK(data, 1), __K8MASK(buffer-release, 2), __K8MASK(nop, 3), NULLMASK }; #undef __K8MASK #define K8_KW_COUNT "count" #define K8_KW_EDGE "edge" #define K8_KW_INV "inv" #define K8_KW_MASK "mask" #define K8_KW_OS "os" #define K8_KW_USR "usr" static int k8_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; int n; uint32_t count; uint64_t evmask; const struct pmc_masks *pm, *pmask; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); pmc_config->pm_md.pm_amd.pm_amd_config = 0; pmask = NULL; evmask = 0; #define __K8SETMASK(M) pmask = k8_mask_##M /* setup parsing tables */ switch (pe) { case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: __K8SETMASK(fdfo); break; case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD: __K8SETMASK(lsrl); break; case PMC_EV_K8_LS_LOCKED_OPERATION: __K8SETMASK(llo); break; case PMC_EV_K8_DC_REFILL_FROM_L2: case PMC_EV_K8_DC_REFILL_FROM_SYSTEM: case PMC_EV_K8_DC_COPYBACK: __K8SETMASK(dc); break; case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR: __K8SETMASK(dobee); break; case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS: __K8SETMASK(ddpi); break; case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: __K8SETMASK(dabl); break; case PMC_EV_K8_BU_INTERNAL_L2_REQUEST: __K8SETMASK(bilr); break; case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS: __K8SETMASK(bfrlm); break; case PMC_EV_K8_BU_FILL_INTO_L2: __K8SETMASK(bfil); break; case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: __K8SETMASK(frfi); break; case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: __K8SETMASK(frfdoi); break; case PMC_EV_K8_FR_FPU_EXCEPTIONS: __K8SETMASK(ffe); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT: __K8SETMASK(nmcpae); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND: __K8SETMASK(nmct); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION: __K8SETMASK(nmcbs); break; case PMC_EV_K8_NB_SIZED_COMMANDS: __K8SETMASK(nsc); break; case PMC_EV_K8_NB_PROBE_RESULT: __K8SETMASK(npr); break; case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH: case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH: case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH: __K8SETMASK(nhbb); break; default: break; /* no options defined */ } while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); count = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_caps |= PMC_CAP_THRESHOLD; pmc_config->pm_md.pm_amd.pm_amd_config |= AMD_PMC_TO_COUNTER(count); } else if (KWMATCH(p, K8_KW_EDGE)) { pmc_config->pm_caps |= PMC_CAP_EDGE; } else if (KWMATCH(p, K8_KW_INV)) { pmc_config->pm_caps |= PMC_CAP_INVERT; } else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) { if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0) return (-1); pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } else if (KWMATCH(p, K8_KW_OS)) { pmc_config->pm_caps |= PMC_CAP_SYSTEM; } else if (KWMATCH(p, K8_KW_USR)) { pmc_config->pm_caps |= PMC_CAP_USER; } else return (-1); } /* other post processing */ switch (pe) { case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED: case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS: case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: case PMC_EV_K8_FR_FPU_EXCEPTIONS: /* XXX only available in rev B and later */ break; case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: /* XXX only available in rev C and later */ break; case PMC_EV_K8_LS_LOCKED_OPERATION: /* XXX CPU Rev A,B evmask is to be zero */ if (evmask & (evmask - 1)) /* > 1 bit set */ return (-1); if (evmask == 0) { evmask = 0x01; /* Rev C and later: #instrs */ pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } break; default: if (evmask == 0 && pmask != NULL) { for (pm = pmask; pm->pm_name; pm++) evmask |= pm->pm_value; pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } } if (pmc_config->pm_caps & PMC_CAP_QUALIFIER) pmc_config->pm_md.pm_amd.pm_amd_config = AMD_PMC_TO_UNITMASK(evmask); return (0); } #endif #if defined(__i386__) || defined(__amd64__) static int tsc_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { if (pe != PMC_EV_TSC_TSC) return (-1); /* TSC events must be unqualified. */ if (ctrspec && *ctrspec != '\0') return (-1); pmc_config->pm_md.pm_amd.pm_amd_config = 0; pmc_config->pm_caps |= PMC_CAP_READ; return (0); } #endif static struct pmc_event_alias generic_aliases[] = { EV_ALIAS("instructions", "SOFT-CLOCK.HARD"), EV_ALIAS(NULL, NULL) }; static int soft_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { (void)ctrspec; (void)pmc_config; if ((int)pe < PMC_EV_SOFT_FIRST || (int)pe > PMC_EV_SOFT_LAST) return (-1); pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); return (0); } #if defined(__arm__) #if defined(__XSCALE__) static struct pmc_event_alias xscale_aliases[] = { EV_ALIAS("branches", "BRANCH_RETIRED"), EV_ALIAS("branch-mispredicts", "BRANCH_MISPRED"), EV_ALIAS("dc-misses", "DC_MISS"), EV_ALIAS("ic-misses", "IC_MISS"), EV_ALIAS("instructions", "INSTR_RETIRED"), EV_ALIAS(NULL, NULL) }; static int xscale_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { switch (pe) { default: break; } return (0); } #endif static struct pmc_event_alias cortex_a8_aliases[] = { EV_ALIAS("dc-misses", "L1_DCACHE_REFILL"), EV_ALIAS("ic-misses", "L1_ICACHE_REFILL"), EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias cortex_a9_aliases[] = { EV_ALIAS("dc-misses", "L1_DCACHE_REFILL"), EV_ALIAS("ic-misses", "L1_ICACHE_REFILL"), EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS(NULL, NULL) }; static int armv7_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { switch (pe) { default: break; } return (0); } #endif #if defined(__aarch64__) static struct pmc_event_alias cortex_a53_aliases[] = { EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias cortex_a57_aliases[] = { EV_ALIAS(NULL, NULL) }; static int arm64_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { switch (pe) { default: break; } return (0); } #endif #if defined(__mips__) static struct pmc_event_alias mips24k_aliases[] = { EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS("branches", "BRANCH_COMPLETED"), EV_ALIAS("branch-mispredicts", "BRANCH_MISPRED"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias mips74k_aliases[] = { EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS("branches", "BRANCH_INSNS"), EV_ALIAS("branch-mispredicts", "MISPREDICTED_BRANCH_INSNS"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias octeon_aliases[] = { EV_ALIAS("instructions", "RET"), EV_ALIAS("branches", "BR"), EV_ALIAS("branch-mispredicts", "BRMIS"), EV_ALIAS(NULL, NULL) }; #define MIPS_KW_OS "os" #define MIPS_KW_USR "usr" #define MIPS_KW_ANYTHREAD "anythread" static int mips_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { char *p; (void) pe; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWMATCH(p, MIPS_KW_OS)) pmc_config->pm_caps |= PMC_CAP_SYSTEM; else if (KWMATCH(p, MIPS_KW_USR)) pmc_config->pm_caps |= PMC_CAP_USER; else if (KWMATCH(p, MIPS_KW_ANYTHREAD)) pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM); else return (-1); } return (0); } #endif /* __mips__ */ #if defined(__powerpc__) static struct pmc_event_alias ppc7450_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("branches", "BRANCHES_COMPLETED"), EV_ALIAS("branch-mispredicts", "MISPREDICTED_BRANCHES"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias ppc970_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("cycles", "CYCLES"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias e500_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("cycles", "CYCLES"), EV_ALIAS(NULL, NULL) }; #define POWERPC_KW_OS "os" #define POWERPC_KW_USR "usr" #define POWERPC_KW_ANYTHREAD "anythread" static int powerpc_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { char *p; (void) pe; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWMATCH(p, POWERPC_KW_OS)) pmc_config->pm_caps |= PMC_CAP_SYSTEM; else if (KWMATCH(p, POWERPC_KW_USR)) pmc_config->pm_caps |= PMC_CAP_USER; else if (KWMATCH(p, POWERPC_KW_ANYTHREAD)) pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM); else return (-1); } return (0); } #endif /* __powerpc__ */ /* * Match an event name `name' with its canonical form. * * Matches are case insensitive and spaces, periods, underscores and * hyphen characters are considered to match each other. * * Returns 1 for a match, 0 otherwise. */ static int pmc_match_event_name(const char *name, const char *canonicalname) { int cc, nc; const unsigned char *c, *n; c = (const unsigned char *) canonicalname; n = (const unsigned char *) name; for (; (nc = *n) && (cc = *c); n++, c++) { if ((nc == ' ' || nc == '_' || nc == '-' || nc == '.') && (cc == ' ' || cc == '_' || cc == '-' || cc == '.')) continue; if (toupper(nc) == toupper(cc)) continue; return (0); } if (*n == '\0' && *c == '\0') return (1); return (0); } /* * Match an event name against all the event named supported by a * PMC class. * * Returns an event descriptor pointer on match or NULL otherwise. */ static const struct pmc_event_descr * pmc_match_event_class(const char *name, const struct pmc_class_descr *pcd) { size_t n; const struct pmc_event_descr *ev; ev = pcd->pm_evc_event_table; for (n = 0; n < pcd->pm_evc_event_table_size; n++, ev++) if (pmc_match_event_name(name, ev->pm_ev_name)) return (ev); return (NULL); } static int pmc_mdep_is_compatible_class(enum pmc_class pc) { size_t n; for (n = 0; n < pmc_mdep_class_list_size; n++) if (pmc_mdep_class_list[n] == pc) return (1); return (0); } /* * API entry points */ int pmc_allocate(const char *ctrspec, enum pmc_mode mode, - uint32_t flags, int cpu, pmc_id_t *pmcid) + uint32_t flags, int cpu, pmc_id_t *pmcid, + uint64_t count) { size_t n; int retval; char *r, *spec_copy; const char *ctrname; const struct pmc_event_descr *ev; const struct pmc_event_alias *alias; struct pmc_op_pmcallocate pmc_config; const struct pmc_class_descr *pcd; spec_copy = NULL; retval = -1; if (mode != PMC_MODE_SS && mode != PMC_MODE_TS && mode != PMC_MODE_SC && mode != PMC_MODE_TC) { errno = EINVAL; goto out; } bzero(&pmc_config, sizeof(pmc_config)); pmc_config.pm_cpu = cpu; pmc_config.pm_mode = mode; pmc_config.pm_flags = flags; + pmc_config.pm_count = count; if (PMC_IS_SAMPLING_MODE(mode)) pmc_config.pm_caps |= PMC_CAP_INTERRUPT; /* * Can we pull this straight from the pmu table? */ r = spec_copy = strdup(ctrspec); ctrname = strsep(&r, ","); if (pmc_pmu_enabled()) { if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0) { if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0) { goto out; } retval = 0; *pmcid = pmc_config.pm_pmcid; goto out; } errx(EX_USAGE, "ERROR: pmc_pmu_allocate failed, check for ctrname %s\n", ctrname); } else { free(spec_copy); spec_copy = NULL; } /* replace an event alias with the canonical event specifier */ if (pmc_mdep_event_aliases) for (alias = pmc_mdep_event_aliases; alias->pm_alias; alias++) if (!strcasecmp(ctrspec, alias->pm_alias)) { spec_copy = strdup(alias->pm_spec); break; } if (spec_copy == NULL) spec_copy = strdup(ctrspec); r = spec_copy; ctrname = strsep(&r, ","); /* * If a explicit class prefix was given by the user, restrict the * search for the event to the specified PMC class. */ ev = NULL; for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) { pcd = pmc_class_table[n]; if (pcd && pmc_mdep_is_compatible_class(pcd->pm_evc_class) && strncasecmp(ctrname, pcd->pm_evc_name, pcd->pm_evc_name_size) == 0) { if ((ev = pmc_match_event_class(ctrname + pcd->pm_evc_name_size, pcd)) == NULL) { errno = EINVAL; goto out; } break; } } /* * Otherwise, search for this event in all compatible PMC * classes. */ for (n = 0; ev == NULL && n < PMC_CLASS_TABLE_SIZE; n++) { pcd = pmc_class_table[n]; if (pcd && pmc_mdep_is_compatible_class(pcd->pm_evc_class)) ev = pmc_match_event_class(ctrname, pcd); } if (ev == NULL) { errno = EINVAL; goto out; } pmc_config.pm_ev = ev->pm_ev_code; pmc_config.pm_class = pcd->pm_evc_class; if (pcd->pm_evc_allocate_pmc(ev->pm_ev_code, r, &pmc_config) < 0) { errno = EINVAL; goto out; } if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0) goto out; *pmcid = pmc_config.pm_pmcid; retval = 0; out: if (spec_copy) free(spec_copy); return (retval); } int pmc_attach(pmc_id_t pmc, pid_t pid) { struct pmc_op_pmcattach pmc_attach_args; pmc_attach_args.pm_pmc = pmc; pmc_attach_args.pm_pid = pid; return (PMC_CALL(PMCATTACH, &pmc_attach_args)); } int pmc_capabilities(pmc_id_t pmcid, uint32_t *caps) { unsigned int i; enum pmc_class cl; cl = PMC_ID_TO_CLASS(pmcid); for (i = 0; i < cpu_info.pm_nclass; i++) if (cpu_info.pm_classes[i].pm_class == cl) { *caps = cpu_info.pm_classes[i].pm_caps; return (0); } errno = EINVAL; return (-1); } int pmc_configure_logfile(int fd) { struct pmc_op_configurelog cla; cla.pm_logfd = fd; if (PMC_CALL(CONFIGURELOG, &cla) < 0) return (-1); return (0); } int pmc_cpuinfo(const struct pmc_cpuinfo **pci) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } *pci = &cpu_info; return (0); } int pmc_detach(pmc_id_t pmc, pid_t pid) { struct pmc_op_pmcattach pmc_detach_args; pmc_detach_args.pm_pmc = pmc; pmc_detach_args.pm_pid = pid; return (PMC_CALL(PMCDETACH, &pmc_detach_args)); } int pmc_disable(int cpu, int pmc) { struct pmc_op_pmcadmin ssa; ssa.pm_cpu = cpu; ssa.pm_pmc = pmc; ssa.pm_state = PMC_STATE_DISABLED; return (PMC_CALL(PMCADMIN, &ssa)); } int pmc_enable(int cpu, int pmc) { struct pmc_op_pmcadmin ssa; ssa.pm_cpu = cpu; ssa.pm_pmc = pmc; ssa.pm_state = PMC_STATE_FREE; return (PMC_CALL(PMCADMIN, &ssa)); } /* * Return a list of events known to a given PMC class. 'cl' is the * PMC class identifier, 'eventnames' is the returned list of 'const * char *' pointers pointing to the names of the events. 'nevents' is * the number of event name pointers returned. * * The space for 'eventnames' is allocated using malloc(3). The caller * is responsible for freeing this space when done. */ int pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, int *nevents) { int count; const char **names; const struct pmc_event_descr *ev; switch (cl) { case PMC_CLASS_IAF: ev = iaf_event_table; count = PMC_EVENT_TABLE_SIZE(iaf); break; case PMC_CLASS_TSC: ev = tsc_event_table; count = PMC_EVENT_TABLE_SIZE(tsc); break; case PMC_CLASS_K8: ev = k8_event_table; count = PMC_EVENT_TABLE_SIZE(k8); break; case PMC_CLASS_XSCALE: ev = xscale_event_table; count = PMC_EVENT_TABLE_SIZE(xscale); break; case PMC_CLASS_ARMV7: switch (cpu_info.pm_cputype) { default: case PMC_CPU_ARMV7_CORTEX_A8: ev = cortex_a8_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a8); break; case PMC_CPU_ARMV7_CORTEX_A9: ev = cortex_a9_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a9); break; } break; case PMC_CLASS_ARMV8: switch (cpu_info.pm_cputype) { default: case PMC_CPU_ARMV8_CORTEX_A53: ev = cortex_a53_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a53); break; case PMC_CPU_ARMV8_CORTEX_A57: ev = cortex_a57_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a57); break; } break; case PMC_CLASS_MIPS24K: ev = mips24k_event_table; count = PMC_EVENT_TABLE_SIZE(mips24k); break; case PMC_CLASS_MIPS74K: ev = mips74k_event_table; count = PMC_EVENT_TABLE_SIZE(mips74k); break; case PMC_CLASS_OCTEON: ev = octeon_event_table; count = PMC_EVENT_TABLE_SIZE(octeon); break; case PMC_CLASS_PPC7450: ev = ppc7450_event_table; count = PMC_EVENT_TABLE_SIZE(ppc7450); break; case PMC_CLASS_PPC970: ev = ppc970_event_table; count = PMC_EVENT_TABLE_SIZE(ppc970); break; case PMC_CLASS_E500: ev = e500_event_table; count = PMC_EVENT_TABLE_SIZE(e500); break; case PMC_CLASS_SOFT: ev = soft_event_table; count = soft_event_info.pm_nevent; break; default: errno = EINVAL; return (-1); } if ((names = malloc(count * sizeof(const char *))) == NULL) return (-1); *eventnames = names; *nevents = count; for (;count--; ev++, names++) *names = ev->pm_ev_name; return (0); } int pmc_flush_logfile(void) { return (PMC_CALL(FLUSHLOG,0)); } int pmc_close_logfile(void) { return (PMC_CALL(CLOSELOG,0)); } int pmc_get_driver_stats(struct pmc_driverstats *ds) { struct pmc_op_getdriverstats gms; if (PMC_CALL(GETDRIVERSTATS, &gms) < 0) return (-1); /* copy out fields in the current userland<->library interface */ ds->pm_intr_ignored = gms.pm_intr_ignored; ds->pm_intr_processed = gms.pm_intr_processed; ds->pm_intr_bufferfull = gms.pm_intr_bufferfull; ds->pm_syscalls = gms.pm_syscalls; ds->pm_syscall_errors = gms.pm_syscall_errors; ds->pm_buffer_requests = gms.pm_buffer_requests; ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed; ds->pm_log_sweeps = gms.pm_log_sweeps; return (0); } int pmc_get_msr(pmc_id_t pmc, uint32_t *msr) { struct pmc_op_getmsr gm; gm.pm_pmcid = pmc; if (PMC_CALL(PMCGETMSR, &gm) < 0) return (-1); *msr = gm.pm_msr; return (0); } int pmc_init(void) { int error, pmc_mod_id; unsigned int n; uint32_t abi_version; struct module_stat pmc_modstat; struct pmc_op_getcpuinfo op_cpu_info; #if defined(__amd64__) || defined(__i386__) int cpu_has_iaf_counters; unsigned int t; #endif if (pmc_syscall != -1) /* already inited */ return (0); /* retrieve the system call number from the KLD */ if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0) return (-1); pmc_modstat.version = sizeof(struct module_stat); if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0) return (-1); pmc_syscall = pmc_modstat.data.intval; /* check the kernel module's ABI against our compiled-in version */ abi_version = PMC_VERSION; if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0) return (pmc_syscall = -1); /* ignore patch & minor numbers for the comparison */ if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) { errno = EPROGMISMATCH; return (pmc_syscall = -1); } bzero(&op_cpu_info, sizeof(op_cpu_info)); if (PMC_CALL(GETCPUINFO, &op_cpu_info) < 0) return (pmc_syscall = -1); cpu_info.pm_cputype = op_cpu_info.pm_cputype; cpu_info.pm_ncpu = op_cpu_info.pm_ncpu; cpu_info.pm_npmc = op_cpu_info.pm_npmc; cpu_info.pm_nclass = op_cpu_info.pm_nclass; for (n = 0; n < op_cpu_info.pm_nclass; n++) memcpy(&cpu_info.pm_classes[n], &op_cpu_info.pm_classes[n], sizeof(cpu_info.pm_classes[n])); pmc_class_table = malloc(PMC_CLASS_TABLE_SIZE * sizeof(struct pmc_class_descr *)); if (pmc_class_table == NULL) return (-1); for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) pmc_class_table[n] = NULL; /* * Get soft events list. */ soft_event_info.pm_class = PMC_CLASS_SOFT; if (PMC_CALL(GETDYNEVENTINFO, &soft_event_info) < 0) return (pmc_syscall = -1); /* Map soft events to static list. */ for (n = 0; n < soft_event_info.pm_nevent; n++) { soft_event_table[n].pm_ev_name = soft_event_info.pm_events[n].pm_ev_name; soft_event_table[n].pm_ev_code = soft_event_info.pm_events[n].pm_ev_code; } soft_class_table_descr.pm_evc_event_table_size = \ soft_event_info.pm_nevent; soft_class_table_descr.pm_evc_event_table = \ soft_event_table; /* * Fill in the class table. */ n = 0; /* Fill soft events information. */ pmc_class_table[n++] = &soft_class_table_descr; #if defined(__amd64__) || defined(__i386__) if (cpu_info.pm_cputype != PMC_CPU_GENERIC) pmc_class_table[n++] = &tsc_class_table_descr; /* * Check if this CPU has fixed function counters. */ cpu_has_iaf_counters = 0; for (t = 0; t < cpu_info.pm_nclass; t++) if (cpu_info.pm_classes[t].pm_class == PMC_CLASS_IAF && cpu_info.pm_classes[t].pm_num > 0) cpu_has_iaf_counters = 1; #endif #define PMC_MDEP_INIT(C) do { \ pmc_mdep_event_aliases = C##_aliases; \ pmc_mdep_class_list = C##_pmc_classes; \ pmc_mdep_class_list_size = \ PMC_TABLE_SIZE(C##_pmc_classes); \ } while (0) #define PMC_MDEP_INIT_INTEL_V2(C) do { \ PMC_MDEP_INIT(C); \ pmc_class_table[n++] = &iaf_class_table_descr; \ if (!cpu_has_iaf_counters) \ pmc_mdep_event_aliases = \ C##_aliases_without_iaf; \ pmc_class_table[n] = &C##_class_table_descr; \ } while (0) /* Configure the event name parser. */ switch (cpu_info.pm_cputype) { #if defined(__amd64__) || defined(__i386__) case PMC_CPU_AMD_K8: PMC_MDEP_INIT(k8); pmc_class_table[n] = &k8_class_table_descr; break; #endif case PMC_CPU_GENERIC: PMC_MDEP_INIT(generic); break; #if defined(__arm__) #if defined(__XSCALE__) case PMC_CPU_INTEL_XSCALE: PMC_MDEP_INIT(xscale); pmc_class_table[n] = &xscale_class_table_descr; break; #endif case PMC_CPU_ARMV7_CORTEX_A8: PMC_MDEP_INIT(cortex_a8); pmc_class_table[n] = &cortex_a8_class_table_descr; break; case PMC_CPU_ARMV7_CORTEX_A9: PMC_MDEP_INIT(cortex_a9); pmc_class_table[n] = &cortex_a9_class_table_descr; break; #endif #if defined(__aarch64__) case PMC_CPU_ARMV8_CORTEX_A53: PMC_MDEP_INIT(cortex_a53); pmc_class_table[n] = &cortex_a53_class_table_descr; break; case PMC_CPU_ARMV8_CORTEX_A57: PMC_MDEP_INIT(cortex_a57); pmc_class_table[n] = &cortex_a57_class_table_descr; break; #endif #if defined(__mips__) case PMC_CPU_MIPS_24K: PMC_MDEP_INIT(mips24k); pmc_class_table[n] = &mips24k_class_table_descr; break; case PMC_CPU_MIPS_74K: PMC_MDEP_INIT(mips74k); pmc_class_table[n] = &mips74k_class_table_descr; break; case PMC_CPU_MIPS_OCTEON: PMC_MDEP_INIT(octeon); pmc_class_table[n] = &octeon_class_table_descr; break; #endif /* __mips__ */ #if defined(__powerpc__) case PMC_CPU_PPC_7450: PMC_MDEP_INIT(ppc7450); pmc_class_table[n] = &ppc7450_class_table_descr; break; case PMC_CPU_PPC_970: PMC_MDEP_INIT(ppc970); pmc_class_table[n] = &ppc970_class_table_descr; break; case PMC_CPU_PPC_E500: PMC_MDEP_INIT(e500); pmc_class_table[n] = &e500_class_table_descr; break; #endif default: /* * Some kind of CPU this version of the library knows nothing * about. This shouldn't happen since the abi version check * should have caught this. */ #if defined(__amd64__) || defined(__i386__) break; #endif errno = ENXIO; return (pmc_syscall = -1); } return (0); } const char * pmc_name_of_capability(enum pmc_caps cap) { int i; /* * 'cap' should have a single bit set and should be in * range. */ if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST || cap > PMC_CAP_LAST) { errno = EINVAL; return (NULL); } i = ffs(cap); return (pmc_capability_names[i - 1]); } const char * pmc_name_of_class(enum pmc_class pc) { size_t n; for (n = 0; n < PMC_TABLE_SIZE(pmc_class_names); n++) if (pc == pmc_class_names[n].pm_class) return (pmc_class_names[n].pm_name); errno = EINVAL; return (NULL); } const char * pmc_name_of_cputype(enum pmc_cputype cp) { size_t n; for (n = 0; n < PMC_TABLE_SIZE(pmc_cputype_names); n++) if (cp == pmc_cputype_names[n].pm_cputype) return (pmc_cputype_names[n].pm_name); errno = EINVAL; return (NULL); } const char * pmc_name_of_disposition(enum pmc_disp pd) { if ((int) pd >= PMC_DISP_FIRST && pd <= PMC_DISP_LAST) return (pmc_disposition_names[pd]); errno = EINVAL; return (NULL); } const char * _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) { const struct pmc_event_descr *ev, *evfence; ev = evfence = NULL; if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) { ev = k8_event_table; evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8); } else if (pe >= PMC_EV_XSCALE_FIRST && pe <= PMC_EV_XSCALE_LAST) { ev = xscale_event_table; evfence = xscale_event_table + PMC_EVENT_TABLE_SIZE(xscale); } else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) { switch (cpu) { case PMC_CPU_ARMV7_CORTEX_A8: ev = cortex_a8_event_table; evfence = cortex_a8_event_table + PMC_EVENT_TABLE_SIZE(cortex_a8); break; case PMC_CPU_ARMV7_CORTEX_A9: ev = cortex_a9_event_table; evfence = cortex_a9_event_table + PMC_EVENT_TABLE_SIZE(cortex_a9); break; default: /* Unknown CPU type. */ break; } } else if (pe >= PMC_EV_ARMV8_FIRST && pe <= PMC_EV_ARMV8_LAST) { switch (cpu) { case PMC_CPU_ARMV8_CORTEX_A53: ev = cortex_a53_event_table; evfence = cortex_a53_event_table + PMC_EVENT_TABLE_SIZE(cortex_a53); break; case PMC_CPU_ARMV8_CORTEX_A57: ev = cortex_a57_event_table; evfence = cortex_a57_event_table + PMC_EVENT_TABLE_SIZE(cortex_a57); break; default: /* Unknown CPU type. */ break; } } else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) { ev = mips24k_event_table; evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k); } else if (pe >= PMC_EV_MIPS74K_FIRST && pe <= PMC_EV_MIPS74K_LAST) { ev = mips74k_event_table; evfence = mips74k_event_table + PMC_EVENT_TABLE_SIZE(mips74k); } else if (pe >= PMC_EV_OCTEON_FIRST && pe <= PMC_EV_OCTEON_LAST) { ev = octeon_event_table; evfence = octeon_event_table + PMC_EVENT_TABLE_SIZE(octeon); } else if (pe >= PMC_EV_PPC7450_FIRST && pe <= PMC_EV_PPC7450_LAST) { ev = ppc7450_event_table; evfence = ppc7450_event_table + PMC_EVENT_TABLE_SIZE(ppc7450); } else if (pe >= PMC_EV_PPC970_FIRST && pe <= PMC_EV_PPC970_LAST) { ev = ppc970_event_table; evfence = ppc970_event_table + PMC_EVENT_TABLE_SIZE(ppc970); } else if (pe >= PMC_EV_E500_FIRST && pe <= PMC_EV_E500_LAST) { ev = e500_event_table; evfence = e500_event_table + PMC_EVENT_TABLE_SIZE(e500); } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); } else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) { ev = soft_event_table; evfence = soft_event_table + soft_event_info.pm_nevent; } for (; ev != evfence; ev++) if (pe == ev->pm_ev_code) return (ev->pm_ev_name); return (NULL); } const char * pmc_name_of_event(enum pmc_event pe) { const char *n; if ((n = _pmc_name_of_event(pe, cpu_info.pm_cputype)) != NULL) return (n); errno = EINVAL; return (NULL); } const char * pmc_name_of_mode(enum pmc_mode pm) { if ((int) pm >= PMC_MODE_FIRST && pm <= PMC_MODE_LAST) return (pmc_mode_names[pm]); errno = EINVAL; return (NULL); } const char * pmc_name_of_state(enum pmc_state ps) { if ((int) ps >= PMC_STATE_FIRST && ps <= PMC_STATE_LAST) return (pmc_state_names[ps]); errno = EINVAL; return (NULL); } int pmc_ncpu(void) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } return (cpu_info.pm_ncpu); } int pmc_npmc(int cpu) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) { errno = EINVAL; return (-1); } return (cpu_info.pm_npmc); } int pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci) { int nbytes, npmc; struct pmc_op_getpmcinfo *pmci; if ((npmc = pmc_npmc(cpu)) < 0) return (-1); nbytes = sizeof(struct pmc_op_getpmcinfo) + npmc * sizeof(struct pmc_info); if ((pmci = calloc(1, nbytes)) == NULL) return (-1); pmci->pm_cpu = cpu; if (PMC_CALL(GETPMCINFO, pmci) < 0) { free(pmci); return (-1); } /* kernel<->library, library<->userland interfaces are identical */ *ppmci = (struct pmc_pmcinfo *) pmci; return (0); } int pmc_read(pmc_id_t pmc, pmc_value_t *value) { struct pmc_op_pmcrw pmc_read_op; pmc_read_op.pm_pmcid = pmc; pmc_read_op.pm_flags = PMC_F_OLDVALUE; pmc_read_op.pm_value = -1; if (PMC_CALL(PMCRW, &pmc_read_op) < 0) return (-1); *value = pmc_read_op.pm_value; return (0); } int pmc_release(pmc_id_t pmc) { struct pmc_op_simple pmc_release_args; pmc_release_args.pm_pmcid = pmc; return (PMC_CALL(PMCRELEASE, &pmc_release_args)); } int pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep) { struct pmc_op_pmcrw pmc_rw_op; pmc_rw_op.pm_pmcid = pmc; pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE; pmc_rw_op.pm_value = newvalue; if (PMC_CALL(PMCRW, &pmc_rw_op) < 0) return (-1); *oldvaluep = pmc_rw_op.pm_value; return (0); } int pmc_set(pmc_id_t pmc, pmc_value_t value) { struct pmc_op_pmcsetcount sc; sc.pm_pmcid = pmc; sc.pm_count = value; if (PMC_CALL(PMCSETCOUNT, &sc) < 0) return (-1); return (0); } int pmc_start(pmc_id_t pmc) { struct pmc_op_simple pmc_start_args; pmc_start_args.pm_pmcid = pmc; return (PMC_CALL(PMCSTART, &pmc_start_args)); } int pmc_stop(pmc_id_t pmc) { struct pmc_op_simple pmc_stop_args; pmc_stop_args.pm_pmcid = pmc; return (PMC_CALL(PMCSTOP, &pmc_stop_args)); } int pmc_width(pmc_id_t pmcid, uint32_t *width) { unsigned int i; enum pmc_class cl; cl = PMC_ID_TO_CLASS(pmcid); for (i = 0; i < cpu_info.pm_nclass; i++) if (cpu_info.pm_classes[i].pm_class == cl) { *width = cpu_info.pm_classes[i].pm_width; return (0); } errno = EINVAL; return (-1); } int pmc_write(pmc_id_t pmc, pmc_value_t value) { struct pmc_op_pmcrw pmc_write_op; pmc_write_op.pm_pmcid = pmc; pmc_write_op.pm_flags = PMC_F_NEWVALUE; pmc_write_op.pm_value = value; return (PMC_CALL(PMCRW, &pmc_write_op)); } int pmc_writelog(uint32_t userdata) { struct pmc_op_writelog wl; wl.pm_userdata = userdata; return (PMC_CALL(WRITELOG, &wl)); } diff --git a/lib/libpmc/libpmc_pmu_util.c b/lib/libpmc/libpmc_pmu_util.c index dbcf355aa394..730954d8e23a 100644 --- a/lib/libpmc/libpmc_pmu_util.c +++ b/lib/libpmc/libpmc_pmu_util.c @@ -1,490 +1,490 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include #include "pmu-events/pmu-events.h" #if defined(__amd64__) || defined(__i386__) struct pmu_alias { const char *pa_alias; const char *pa_name; }; static struct pmu_alias pmu_alias_table[] = { {"UNHALTED_CORE_CYCLES", "CPU_CLK_UNHALTED.THREAD_P_ANY"}, {"UNHALTED-CORE-CYCLES", "CPU_CLK_UNHALTED.THREAD_P_ANY"}, {"LLC_MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC-MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC_REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC-REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC_MISS_RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"LLC-MISS-RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"RESOURCE_STALL", "RESOURCE_STALLS.ANY"}, {"RESOURCE_STALLS_ANY", "RESOURCE_STALLS.ANY"}, {"BRANCH_INSTRUCTION_RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH-INSTRUCTION-RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH_MISSES_RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"BRANCH-MISSES-RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"cycles", "tsc-tsc"}, {"instructions", "inst-retired.any_p"}, {"branch-mispredicts", "br_misp_retired.all_branches"}, {"branches", "br_inst_retired.all_branches"}, {"interrupts", "hw_interrupts.received"}, {"ic-misses", "frontend_retired.l1i_miss"}, {NULL, NULL}, }; /* * The Intel fixed mode counters are: * "inst_retired.any", * "cpu_clk_unhalted.thread", * "cpu_clk_unhalted.thread_any", * "cpu_clk_unhalted.ref_tsc", * */ static const char * pmu_alias_get(const char *name) { struct pmu_alias *pa; for (pa = pmu_alias_table; pa->pa_alias != NULL; pa++) if (strcasecmp(name, pa->pa_alias) == 0) return (pa->pa_name); return (name); } struct pmu_event_desc { uint64_t ped_period; uint64_t ped_offcore_rsp; uint32_t ped_event; uint32_t ped_frontend; uint32_t ped_ldlat; uint32_t ped_config1; int16_t ped_umask; uint8_t ped_cmask; uint8_t ped_any; uint8_t ped_inv; uint8_t ped_edge; uint8_t ped_fc_mask; uint8_t ped_ch_mask; }; static const struct pmu_events_map * pmu_events_map_get(const char *cpuid) { size_t s; char buf[64]; const struct pmu_events_map *pme; if (cpuid != NULL) { memcpy(buf, cpuid, 64); } else { if (sysctlbyname("kern.hwpmc.cpuid", (void *)NULL, &s, (void *)NULL, 0) == -1) return (NULL); if (sysctlbyname("kern.hwpmc.cpuid", buf, &s, (void *)NULL, 0) == -1) return (NULL); } for (pme = pmu_events_map; pme->cpuid != NULL; pme++) if (strcmp(buf, pme->cpuid) == 0) return (pme); return (NULL); } static const struct pmu_event * pmu_event_get(const char *cpuid, const char *event_name, int *idx) { const struct pmu_events_map *pme; const struct pmu_event *pe; int i; if ((pme = pmu_events_map_get(cpuid)) == NULL) return (NULL); for (i = 0, pe = pme->table; pe->name || pe->desc || pe->event; pe++, i++) { if (pe->name == NULL) continue; if (strcasecmp(pe->name, event_name) == 0) { if (idx) *idx = i; return (pe); } } return (NULL); } int pmc_pmu_idx_get_by_event(const char *cpuid, const char *event) { int idx; const char *realname; realname = pmu_alias_get(event); if (pmu_event_get(cpuid, realname, &idx) == NULL) return (-1); return (idx); } const char * -pmc_pmu_event_get_by_idx(int idx) +pmc_pmu_event_get_by_idx(const char *cpuid, int idx) { const struct pmu_events_map *pme; const struct pmu_event *pe; int i; - if ((pme = pmu_events_map_get(NULL)) == NULL) + if ((pme = pmu_events_map_get(cpuid)) == NULL) return (NULL); for (i = 0, pe = pme->table; (pe->name || pe->desc || pe->event) && i < idx; pe++, i++); return (pe->name); } static int pmu_parse_event(struct pmu_event_desc *ped, const char *eventin) { char *event; char *kvp, *key, *value, *r; char *debug; if ((event = strdup(eventin)) == NULL) return (ENOMEM); r = event; bzero(ped, sizeof(*ped)); ped->ped_umask = -1; while ((kvp = strsep(&event, ",")) != NULL) { key = strsep(&kvp, "="); if (key == NULL) abort(); value = kvp; if (strcmp(key, "umask") == 0) ped->ped_umask = strtol(value, NULL, 16); else if (strcmp(key, "event") == 0) ped->ped_event = strtol(value, NULL, 16); else if (strcmp(key, "period") == 0) ped->ped_period = strtol(value, NULL, 10); else if (strcmp(key, "offcore_rsp") == 0) ped->ped_offcore_rsp = strtol(value, NULL, 16); else if (strcmp(key, "any") == 0) ped->ped_any = strtol(value, NULL, 10); else if (strcmp(key, "cmask") == 0) ped->ped_cmask = strtol(value, NULL, 10); else if (strcmp(key, "inv") == 0) ped->ped_inv = strtol(value, NULL, 10); else if (strcmp(key, "edge") == 0) ped->ped_edge = strtol(value, NULL, 10); else if (strcmp(key, "frontend") == 0) ped->ped_frontend = strtol(value, NULL, 16); else if (strcmp(key, "ldlat") == 0) ped->ped_ldlat = strtol(value, NULL, 16); else if (strcmp(key, "fc_mask") == 0) ped->ped_fc_mask = strtol(value, NULL, 16); else if (strcmp(key, "ch_mask") == 0) ped->ped_ch_mask = strtol(value, NULL, 16); else if (strcmp(key, "config1") == 0) ped->ped_config1 = strtol(value, NULL, 16); else { debug = getenv("PMUDEBUG"); if (debug != NULL && strcmp(debug, "true") == 0 && value != NULL) printf("unrecognized kvpair: %s:%s\n", key, value); } } free(r); return (0); } uint64_t pmc_pmu_sample_rate_get(const char *event_name) { const struct pmu_event *pe; struct pmu_event_desc ped; event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(NULL, event_name, NULL)) == NULL) return (DEFAULT_SAMPLE_COUNT); if (pe->alias && (pe = pmu_event_get(NULL, pe->alias, NULL)) == NULL) return (DEFAULT_SAMPLE_COUNT); if (pe->event == NULL) return (DEFAULT_SAMPLE_COUNT); if (pmu_parse_event(&ped, pe->event)) return (DEFAULT_SAMPLE_COUNT); return (ped.ped_period); } int pmc_pmu_enabled(void) { return (pmu_events_map_get(NULL) != NULL); } void pmc_pmu_print_counters(const char *event_name) { const struct pmu_events_map *pme; const struct pmu_event *pe; struct pmu_event_desc ped; char *debug; int do_debug; debug = getenv("PMUDEBUG"); do_debug = 0; if (debug != NULL && strcmp(debug, "true") == 0) do_debug = 1; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (event_name != NULL && strcasestr(pe->name, event_name) == NULL) continue; printf("\t%s\n", pe->name); if (do_debug) pmu_parse_event(&ped, pe->event); } } void pmc_pmu_print_counter_desc(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL && pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } void pmc_pmu_print_counter_desc_long(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL) { if (pe->long_desc != NULL) printf("%s:\n%s\n", pe->name, pe->long_desc); else if (pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } } void pmc_pmu_print_counter_full(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) == NULL) continue; printf("name: %s\n", pe->name); if (pe->long_desc != NULL) printf("desc: %s\n", pe->long_desc); else if (pe->desc != NULL) printf("desc: %s\n", pe->desc); if (pe->event != NULL) printf("event: %s\n", pe->event); if (pe->topic != NULL) printf("topic: %s\n", pe->topic); if (pe->pmu != NULL) printf("pmu: %s\n", pe->pmu); if (pe->unit != NULL) printf("unit: %s\n", pe->unit); if (pe->perpkg != NULL) printf("perpkg: %s\n", pe->perpkg); if (pe->metric_expr != NULL) printf("metric_expr: %s\n", pe->metric_expr); if (pe->metric_name != NULL) printf("metric_name: %s\n", pe->metric_name); if (pe->metric_group != NULL) printf("metric_group: %s\n", pe->metric_group); } } int pmc_pmu_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm) { const struct pmu_event *pe; struct pmu_event_desc ped; struct pmc_md_iap_op_pmcallocate *iap; int idx, isfixed; iap = &pm->pm_md.pm_iap; isfixed = 0; bzero(iap, sizeof(*iap)); event_name = pmu_alias_get(event_name); pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); if ((pe = pmu_event_get(NULL, event_name, &idx)) == NULL) return (ENOENT); if (pe->alias && (pe = pmu_event_get(NULL, pe->alias, &idx)) == NULL) return (ENOENT); if (pe->event == NULL) return (ENOENT); if (pmu_parse_event(&ped, pe->event)) return (ENOENT); if (strcasestr(event_name, "UNC_") == event_name || strcasestr(event_name, "uncore") != NULL) { pm->pm_class = PMC_CLASS_UCP; pm->pm_caps |= PMC_CAP_QUALIFIER; } else if ((ped.ped_umask == -1) || (ped.ped_event == 0x0 && ped.ped_umask == 0x3)) { pm->pm_class = PMC_CLASS_IAF; } else { pm->pm_class = PMC_CLASS_IAP; pm->pm_caps |= PMC_CAP_QUALIFIER; } pm->pm_ev = idx; iap->pm_iap_config |= IAP_EVSEL(ped.ped_event); if (ped.ped_umask > 0) iap->pm_iap_config |= IAP_UMASK(ped.ped_umask); iap->pm_iap_config |= IAP_CMASK(ped.ped_cmask); iap->pm_iap_rsp = ped.ped_offcore_rsp; iap->pm_iap_config |= (IAP_USR | IAP_OS); if (ped.ped_edge) iap->pm_iap_config |= IAP_EDGE; if (ped.ped_any) iap->pm_iap_config |= IAP_ANY; if (ped.ped_inv) iap->pm_iap_config |= IAP_EDGE; if (pm->pm_caps & PMC_CAP_INTERRUPT) iap->pm_iap_config |= IAP_INT; return (0); } /* * Ultimately rely on AMD calling theirs the same */ static const char *stat_mode_cntrs[] = { "cpu_clk_unhalted.thread", "inst_retired.any", "br_inst_retired.all_branches", "br_misp_retired.all_branches", "longest_lat_cache.reference", "longest_lat_cache.miss", }; int pmc_pmu_stat_mode(const char ***cntrs) { if (pmc_pmu_enabled()) { *cntrs = stat_mode_cntrs; return (0); } return (EOPNOTSUPP); } #else uint64_t pmc_pmu_sample_rate_get(const char *event_name __unused) { return (DEFAULT_SAMPLE_COUNT); } void pmc_pmu_print_counters(const char *event_name __unused) { } void pmc_pmu_print_counter_desc(const char *e __unused) { } void pmc_pmu_print_counter_desc_long(const char *e __unused) { } void pmc_pmu_print_counter_full(const char *e __unused) { } int pmc_pmu_enabled(void) { return (0); } int pmc_pmu_pmcallocate(const char *e __unused, struct pmc_op_pmcallocate *p __unused) { return (EOPNOTSUPP); } const char * -pmc_pmu_event_get_by_idx(int idx __unused) +pmc_pmu_event_get_by_idx(const char *c __unused, int idx __unused) { return (NULL); } int pmc_pmu_stat_mode(const char ***a __unused) { return (EOPNOTSUPP); } int pmc_pmu_idx_get_by_event(const char *c __unused, const char *e __unused) { return (-1); } #endif diff --git a/lib/libpmc/pmc.h b/lib/libpmc/pmc.h index 476f86fa9961..9d12085364e5 100644 --- a/lib/libpmc/pmc.h +++ b/lib/libpmc/pmc.h @@ -1,128 +1,128 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003,2004 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PMC_H_ #define _PMC_H_ #include #include #include /* * Driver statistics. */ struct pmc_driverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ unsigned int pm_intr_processed; /* #interrupts processed */ unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ unsigned int pm_syscalls; /* #syscalls */ unsigned int pm_syscall_errors; /* #syscalls with errors */ unsigned int pm_buffer_requests; /* #buffer requests */ unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ unsigned int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * CPU information. */ struct pmc_cpuinfo { enum pmc_cputype pm_cputype; /* the kind of CPU */ uint32_t pm_ncpu; /* number of CPUs */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * Current PMC state. */ struct pmc_pmcinfo { int32_t pm_cpu; /* CPU number */ struct pmc_info pm_pmcs[]; /* NPMC structs */ }; /* * Prototypes */ __BEGIN_DECLS int pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags, - int _cpu, pmc_id_t *_pmcid); + int _cpu, pmc_id_t *_pmcid, uint64_t count); int pmc_attach(pmc_id_t _pmcid, pid_t _pid); int pmc_capabilities(pmc_id_t _pmc, uint32_t *_caps); int pmc_configure_logfile(int _fd); int pmc_flush_logfile(void); int pmc_close_logfile(void); int pmc_detach(pmc_id_t _pmcid, pid_t _pid); int pmc_disable(int _cpu, int _pmc); int pmc_enable(int _cpu, int _pmc); int pmc_get_driver_stats(struct pmc_driverstats *_gms); int pmc_get_msr(pmc_id_t _pmc, uint32_t *_msr); int pmc_init(void); int pmc_read(pmc_id_t _pmc, pmc_value_t *_value); int pmc_release(pmc_id_t _pmc); int pmc_rw(pmc_id_t _pmc, pmc_value_t _newvalue, pmc_value_t *_oldvalue); int pmc_set(pmc_id_t _pmc, pmc_value_t _value); int pmc_start(pmc_id_t _pmc); int pmc_stop(pmc_id_t _pmc); int pmc_width(pmc_id_t _pmc, uint32_t *_width); int pmc_write(pmc_id_t _pmc, pmc_value_t _value); int pmc_writelog(uint32_t _udata); int pmc_ncpu(void); int pmc_npmc(int _cpu); int pmc_cpuinfo(const struct pmc_cpuinfo **_cpu_info); int pmc_pmcinfo(int _cpu, struct pmc_pmcinfo **_pmc_info); const char *pmc_name_of_capability(enum pmc_caps _c); const char *pmc_name_of_class(enum pmc_class _pc); const char *pmc_name_of_cputype(enum pmc_cputype _cp); const char *pmc_name_of_disposition(enum pmc_disp _pd); const char *pmc_name_of_event(enum pmc_event _pe); const char *pmc_name_of_mode(enum pmc_mode _pm); const char *pmc_name_of_state(enum pmc_state _ps); int pmc_event_names_of_class(enum pmc_class _cl, const char ***_eventnames, int *_nevents); int pmc_pmu_enabled(void); void pmc_pmu_print_counters(const char *); void pmc_pmu_print_counter_desc(const char *); void pmc_pmu_print_counter_desc_long(const char *); void pmc_pmu_print_counter_full(const char *); uint64_t pmc_pmu_sample_rate_get(const char *); int pmc_pmu_pmcallocate(const char *, struct pmc_op_pmcallocate *); -const char *pmc_pmu_event_get_by_idx(int idx); +const char *pmc_pmu_event_get_by_idx(const char *, int idx); int pmc_pmu_idx_get_by_event(const char*, const char *); int pmc_pmu_stat_mode(const char ***); __END_DECLS #endif diff --git a/lib/libpmc/pmclog.c b/lib/libpmc/pmclog.c index cfa11118c60c..2df9d97d1e6a 100644 --- a/lib/libpmc/pmclog.c +++ b/lib/libpmc/pmclog.c @@ -1,589 +1,593 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpmcinternal.h" #define PMCLOG_BUFFER_SIZE 4096 /* * API NOTES * * The pmclog(3) API is oriented towards parsing an event stream in * "realtime", i.e., from an data source that may or may not preserve * record boundaries -- for example when the data source is elsewhere * on a network. The API allows data to be fed into the parser zero * or more bytes at a time. * * The state for a log file parser is maintained in a 'struct * pmclog_parse_state'. Parser invocations are done by calling * 'pmclog_read()'; this function will inform the caller when a * complete event is parsed. * * The parser first assembles a complete log file event in an internal * work area (see "ps_saved" below). Once a complete log file event * is read, the parser then parses it and converts it to an event * descriptor usable by the client. We could possibly avoid this two * step process by directly parsing the input log to set fields in the * event record. However the parser's state machine would get * insanely complicated, and this code is unlikely to be used in * performance critical paths. */ #define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \ (* ((uint32_t *) &(PS)->ps_saved)) #define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A) #define PMCLOG_READ32(LE,V) do { \ (V) = *(LE)++; \ } while (0) #define PMCLOG_READ64(LE,V) do { \ uint64_t _v; \ _v = (uint64_t) *(LE)++; \ _v |= ((uint64_t) *(LE)++) << 32; \ (V) = _v; \ } while (0) #define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN)) /* * Assemble a log record from '*len' octets starting from address '*data'. * Update 'data' and 'len' to reflect the number of bytes consumed. * * '*data' is potentially an unaligned address and '*len' octets may * not be enough to complete a event record. */ static enum pmclog_parser_state pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len) { int avail, copylen, recordsize, used; uint32_t h; const int HEADERSIZE = sizeof(uint32_t); char *src, *dst; if ((avail = *len) <= 0) return (ps->ps_state = PL_STATE_ERROR); src = *data; used = 0; if (ps->ps_state == PL_STATE_NEW_RECORD) ps->ps_svcount = 0; dst = (char *) &ps->ps_saved + ps->ps_svcount; switch (ps->ps_state) { case PL_STATE_NEW_RECORD: /* * Transitions: * * Case A: avail < headersize * -> 'expecting header' * * Case B: avail >= headersize * B.1: avail < recordsize * -> 'partial record' * B.2: avail >= recordsize * -> 'new record' */ copylen = avail < HEADERSIZE ? avail : HEADERSIZE; bcopy(src, dst, copylen); ps->ps_svcount = used = copylen; if (copylen < HEADERSIZE) { ps->ps_state = PL_STATE_EXPECTING_HEADER; goto done; } src += copylen; dst += copylen; h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (recordsize <= avail) { /* full record available */ bcopy(src, dst, recordsize - copylen); ps->ps_svcount = used = recordsize; goto done; } /* header + a partial record is available */ bcopy(src, dst, avail - copylen); ps->ps_svcount = used = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; break; case PL_STATE_EXPECTING_HEADER: /* * Transitions: * * Case C: avail+saved < headersize * -> 'expecting header' * * Case D: avail+saved >= headersize * D.1: avail+saved < recordsize * -> 'partial record' * D.2: avail+saved >= recordsize * -> 'new record' * (see PARTIAL_RECORD handling below) */ if (avail + ps->ps_svcount < HEADERSIZE) { bcopy(src, dst, avail); ps->ps_svcount += avail; used = avail; break; } used = copylen = HEADERSIZE - ps->ps_svcount; bcopy(src, dst, copylen); src += copylen; dst += copylen; avail -= copylen; ps->ps_svcount += copylen; /*FALLTHROUGH*/ case PL_STATE_PARTIAL_RECORD: /* * Transitions: * * Case E: avail+saved < recordsize * -> 'partial record' * * Case F: avail+saved >= recordsize * -> 'new record' */ h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (avail + ps->ps_svcount < recordsize) { copylen = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; } else { copylen = recordsize - ps->ps_svcount; ps->ps_state = PL_STATE_NEW_RECORD; } bcopy(src, dst, copylen); ps->ps_svcount += copylen; used += copylen; break; default: goto error; } done: *data += used; *len -= used; return ps->ps_state; error: ps->ps_state = PL_STATE_ERROR; return ps->ps_state; } /* * Get an event from the stream pointed to by '*data'. '*len' * indicates the number of bytes available to parse. Arguments * '*data' and '*len' are updated to indicate the number of bytes * consumed. */ static int pmclog_get_event(void *cookie, char **data, ssize_t *len, struct pmclog_ev *ev) { int evlen, pathlen; uint32_t h, *le, npc, noop; enum pmclog_parser_state e; struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; assert(ps->ps_state != PL_STATE_ERROR); if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; printf("state error\n"); return -1; } if (e != PL_STATE_NEW_RECORD) { ev->pl_state = PMCLOG_REQUIRE_DATA; return -1; } PMCLOG_INITIALIZE_READER(le, ps->ps_saved); ev->pl_data = le; PMCLOG_READ32(le,h); if (!PMCLOG_HEADER_CHECK_MAGIC(h)) { printf("bad magic\n"); ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return -1; } /* copy out the time stamp */ PMCLOG_READ32(le,ev->pl_ts.tv_sec); PMCLOG_READ32(le,ev->pl_ts.tv_nsec); evlen = PMCLOG_HEADER_TO_LENGTH(h); #define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \ (P) = (E) - offsetof(struct TYPE, pl_pathname); \ if ((P) > PATH_MAX || (P) < 0) \ goto error; \ } while (0) #define PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do { \ (SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc)) \ / sizeof(uintfptr_t); \ } while (0); switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) { case PMCLOG_TYPE_CALLCHAIN: PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_tid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags2); PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen); for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++) PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]); for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++) ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0; break; case PMCLOG_TYPE_CLOSELOG: ev->pl_state = PMCLOG_EOF; return (-1); case PMCLOG_TYPE_DROPNOTIFY: /* nothing to do */ break; case PMCLOG_TYPE_INITIALIZE: PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version); PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch); PMCLOG_READSTRING(le, ev->pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN); memcpy(ev->pl_u.pl_i.pl_cpuid, le, PMC_CPUID_LEN); + ps->ps_cpuid = strdup(ev->pl_u.pl_i.pl_cpuid); ps->ps_version = ev->pl_u.pl_i.pl_version; ps->ps_arch = ev->pl_u.pl_i.pl_arch; ps->ps_initialized = 1; break; case PMCLOG_TYPE_MAP_IN: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in); PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid); PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start); PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen); break; case PMCLOG_TYPE_MAP_OUT: PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end); break; case PMCLOG_TYPE_PMCALLOCATE: PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags); - PMCLOG_READ32(le,noop); - ev->pl_u.pl_a.pl_evname = pmc_pmu_event_get_by_idx(ev->pl_u.pl_a.pl_event); + PMCLOG_READ64(le,ev->pl_u.pl_a.pl_rate); + ev->pl_u.pl_a.pl_evname = pmc_pmu_event_get_by_idx(ps->ps_cpuid, ev->pl_u.pl_a.pl_event); if (ev->pl_u.pl_a.pl_evname != NULL) break; else if ((ev->pl_u.pl_a.pl_evname = _pmc_name_of_event(ev->pl_u.pl_a.pl_event, ps->ps_arch)) == NULL) { printf("unknown event\n"); goto error; } break; case PMCLOG_TYPE_PMCALLOCATEDYN: PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags); PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX); break; case PMCLOG_TYPE_PMCATTACH: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid); PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen); break; case PMCLOG_TYPE_PMCDETACH: PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid); break; case PMCLOG_TYPE_PROCCSW: PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid); PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_tid); break; case PMCLOG_TYPE_PROCEXEC: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid); PMCLOG_READ32(le,noop); PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_entryaddr); PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen); break; case PMCLOG_TYPE_PROCEXIT: PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid); PMCLOG_READ32(le,noop); PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value); break; case PMCLOG_TYPE_PROCFORK: PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid); PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid); break; case PMCLOG_TYPE_SYSEXIT: PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid); break; case PMCLOG_TYPE_USERDATA: PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata); break; case PMCLOG_TYPE_THR_CREATE: PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_tid); PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_pid); - PMCLOG_READ32(le,noop); + PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_flags); memcpy(ev->pl_u.pl_tc.pl_tdname, le, MAXCOMLEN+1); break; case PMCLOG_TYPE_THR_EXIT: PMCLOG_READ32(le,ev->pl_u.pl_te.pl_tid); break; case PMCLOG_TYPE_PROC_CREATE: PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_pid); + PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_flags); + PMCLOG_READ32(le,noop); memcpy(ev->pl_u.pl_pc.pl_pcomm, le, MAXCOMLEN+1); break; default: /* unknown record type */ ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return (-1); } ev->pl_offset = (ps->ps_offset += evlen); ev->pl_count = (ps->ps_count += 1); ev->pl_len = evlen; ev->pl_state = PMCLOG_OK; return 0; error: ev->pl_state = PMCLOG_ERROR; ps->ps_state = PL_STATE_ERROR; return -1; } /* * Extract and return the next event from the byte stream. * * Returns 0 and sets the event's state to PMCLOG_OK in case an event * was successfully parsed. Otherwise this function returns -1 and * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if * a parse error was encountered. */ int pmclog_read(void *cookie, struct pmclog_ev *ev) { int retval; ssize_t nread; struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_state == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; return -1; } /* * If there isn't enough data left for a new event try and get * more data. */ if (ps->ps_len == 0) { ev->pl_state = PMCLOG_REQUIRE_DATA; /* * If we have a valid file descriptor to read from, attempt * to read from that. This read may return with an error, * (which may be EAGAIN or other recoverable error), or * can return EOF. */ if (ps->ps_fd != PMCLOG_FD_NONE) { refill: nread = read(ps->ps_fd, ps->ps_buffer, PMCLOG_BUFFER_SIZE); if (nread <= 0) { if (nread == 0) ev->pl_state = PMCLOG_EOF; else if (errno != EAGAIN) /* not restartable */ ev->pl_state = PMCLOG_ERROR; return -1; } ps->ps_len = nread; ps->ps_data = ps->ps_buffer; } else { return -1; } } assert(ps->ps_len > 0); /* Retrieve one event from the byte stream. */ retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev); /* * If we need more data and we have a configured fd, try read * from it. */ if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA && ps->ps_fd != -1) { assert(ps->ps_len == 0); goto refill; } return retval; } /* * Feed data to a memory based parser. * * The memory area pointed to by 'data' needs to be valid till the * next error return from pmclog_next_event(). */ int pmclog_feed(void *cookie, char *data, int len) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (len < 0 || /* invalid length */ ps->ps_buffer || /* called for a file parser */ ps->ps_len != 0) /* unnecessary call */ return -1; ps->ps_data = data; ps->ps_len = len; return 0; } /* * Allocate and initialize parser state. */ void * pmclog_open(int fd) { struct pmclog_parse_state *ps; if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL) return NULL; ps->ps_state = PL_STATE_NEW_RECORD; ps->ps_arch = -1; ps->ps_initialized = 0; ps->ps_count = 0; ps->ps_offset = (off_t) 0; bzero(&ps->ps_saved, sizeof(ps->ps_saved)); + ps->ps_cpuid = NULL; ps->ps_svcount = 0; ps->ps_fd = fd; ps->ps_data = NULL; ps->ps_buffer = NULL; ps->ps_len = 0; /* allocate space for a work area */ if (ps->ps_fd != PMCLOG_FD_NONE) { if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) { free(ps); return NULL; } } return ps; } /* * Free up parser state. */ void pmclog_close(void *cookie) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_buffer) free(ps->ps_buffer); free(ps); } diff --git a/lib/libpmc/pmclog.h b/lib/libpmc/pmclog.h index 0f324ced6bc8..4293984ba476 100644 --- a/lib/libpmc/pmclog.h +++ b/lib/libpmc/pmclog.h @@ -1,227 +1,231 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PMCLOG_H_ #define _PMCLOG_H_ #include #include enum pmclog_state { PMCLOG_OK, PMCLOG_EOF, PMCLOG_REQUIRE_DATA, PMCLOG_ERROR }; struct pmclog_ev_callchain { uint32_t pl_pid; uint32_t pl_tid; uint32_t pl_pmcid; uint32_t pl_cpuflags; uint32_t pl_cpuflags2; uint32_t pl_npc; uintfptr_t pl_pc[PMC_CALLCHAIN_DEPTH_MAX]; }; struct pmclog_ev_dropnotify { }; struct pmclog_ev_closelog { }; struct pmclog_ev_initialize { uint32_t pl_version; uint32_t pl_arch; char pl_cpuid[PATH_MAX]; }; struct pmclog_ev_map_in { pid_t pl_pid; uintfptr_t pl_start; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_map_out { pid_t pl_pid; uintfptr_t pl_start; uintfptr_t pl_end; }; struct pmclog_ev_pcsample { uintfptr_t pl_pc; pid_t pl_pid; pid_t pl_tid; pmc_id_t pl_pmcid; uint32_t pl_flags; uint32_t pl_usermode; }; struct pmclog_ev_pmcallocate { - uint32_t pl_event; const char * pl_evname; + uint64_t pl_rate; + uint32_t pl_event; uint32_t pl_flags; pmc_id_t pl_pmcid; }; struct pmclog_ev_pmcallocatedyn { - uint32_t pl_event; char pl_evname[PMC_NAME_MAX]; + uint32_t pl_event; uint32_t pl_flags; pmc_id_t pl_pmcid; }; struct pmclog_ev_pmcattach { pmc_id_t pl_pmcid; pid_t pl_pid; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_pmcdetach { pmc_id_t pl_pmcid; pid_t pl_pid; }; struct pmclog_ev_proccsw { pid_t pl_pid; pid_t pl_tid; pmc_id_t pl_pmcid; pmc_value_t pl_value; }; struct pmclog_ev_proccreate { pid_t pl_pid; + uint32_t pl_flags; char pl_pcomm[MAXCOMLEN+1]; }; struct pmclog_ev_procexec { pid_t pl_pid; pmc_id_t pl_pmcid; uintfptr_t pl_entryaddr; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_procexit { uint32_t pl_pid; pmc_id_t pl_pmcid; pmc_value_t pl_value; }; struct pmclog_ev_procfork { pid_t pl_oldpid; pid_t pl_newpid; }; struct pmclog_ev_sysexit { pid_t pl_pid; }; struct pmclog_ev_threadcreate { pid_t pl_tid; pid_t pl_pid; + uint32_t pl_flags; char pl_tdname[MAXCOMLEN+1]; }; struct pmclog_ev_threadexit { pid_t pl_tid; }; struct pmclog_ev_userdata { uint32_t pl_userdata; }; struct pmclog_ev { enum pmclog_state pl_state; /* state after 'get_event()' */ off_t pl_offset; /* byte offset in stream */ size_t pl_count; /* count of records so far */ struct timespec pl_ts; /* log entry timestamp */ enum pmclog_type pl_type; /* type of log entry */ void *pl_data; int pl_len; union { /* log entry data */ struct pmclog_ev_callchain pl_cc; struct pmclog_ev_closelog pl_cl; struct pmclog_ev_dropnotify pl_dn; struct pmclog_ev_initialize pl_i; struct pmclog_ev_map_in pl_mi; struct pmclog_ev_map_out pl_mo; struct pmclog_ev_pmcallocate pl_a; struct pmclog_ev_pmcallocatedyn pl_ad; struct pmclog_ev_pmcattach pl_t; struct pmclog_ev_pmcdetach pl_d; struct pmclog_ev_proccsw pl_c; struct pmclog_ev_proccreate pl_pc; struct pmclog_ev_procexec pl_x; struct pmclog_ev_procexit pl_e; struct pmclog_ev_procfork pl_f; struct pmclog_ev_sysexit pl_se; struct pmclog_ev_threadcreate pl_tc; struct pmclog_ev_threadexit pl_te; struct pmclog_ev_userdata pl_u; } pl_u; }; enum pmclog_parser_state { PL_STATE_NEW_RECORD, /* in-between records */ PL_STATE_EXPECTING_HEADER, /* header being read */ PL_STATE_PARTIAL_RECORD, /* header present but not the record */ PL_STATE_ERROR /* parsing error encountered */ }; struct pmclog_parse_state { enum pmclog_parser_state ps_state; enum pmc_cputype ps_arch; /* log file architecture */ uint32_t ps_version; /* hwpmc version */ int ps_initialized; /* whether initialized */ int ps_count; /* count of records processed */ off_t ps_offset; /* stream byte offset */ union pmclog_entry ps_saved; /* saved partial log entry */ int ps_svcount; /* #bytes saved */ int ps_fd; /* active fd or -1 */ char *ps_buffer; /* scratch buffer if fd != -1 */ char *ps_data; /* current parse pointer */ + char *ps_cpuid; /* log cpuid */ size_t ps_len; /* length of buffered data */ }; #define PMCLOG_FD_NONE (-1) __BEGIN_DECLS void *pmclog_open(int _fd); int pmclog_feed(void *_cookie, char *_data, int _len); int pmclog_read(void *_cookie, struct pmclog_ev *_ev); void pmclog_close(void *_cookie); __END_DECLS #endif diff --git a/share/examples/hwpmc/overhead.c b/share/examples/hwpmc/overhead.c index 14834bd27c3d..372babee35d0 100644 --- a/share/examples/hwpmc/overhead.c +++ b/share/examples/hwpmc/overhead.c @@ -1,106 +1,106 @@ /*- * Copyright (c) 2014, Neville-Neil Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * * Author: George V. Neville-Neil * */ /* * Calculate the time overhead of starting, stopping, and recording * pmc counters. * * The only argument is a counter name, such as "instruction-retired" * which is CPU dependent and can be found with pmmcontrol(8) using * pmccontrol -L. * * The start, stop, read and write operations are timed using the * rdtsc() macro which reads the Time Stamp Counter on the CPU. */ #include #include #include #include #include #include int main(int argc, char **argv) { pmc_id_t pmcid; pmc_value_t read_value; pmc_value_t read_clear_value; uint64_t tsc1, write_cyc, start_cyc, read_cyc, stop_cyc; char *counter_name; if (argc != 2) err(EX_USAGE, "counter-name required"); counter_name = argv[1]; if (pmc_init() != 0) err(EX_OSERR, "hwpmc(4) not loaded, kldload or update your kernel"); - if (pmc_allocate(counter_name, PMC_MODE_SC, 0, 0, &pmcid) < 0) + if (pmc_allocate(counter_name, PMC_MODE_SC, 0, 0, &pmcid, 64*1024) < 0) err(EX_OSERR, "failed to allocate %s as a system counter in counting mode", counter_name); tsc1 = rdtsc(); if (pmc_write(pmcid, 0) < 0) err(EX_OSERR, "failed to zero counter %s", counter_name); write_cyc = rdtsc() - tsc1; tsc1 = rdtsc(); if (pmc_start(pmcid) < 0) err(EX_OSERR, "failed to start counter %s", counter_name); start_cyc = rdtsc() - tsc1; tsc1 = rdtsc(); if (pmc_read(pmcid, &read_value) < 0) err(EX_OSERR, "failed to read counter %s", counter_name); read_cyc = rdtsc() - tsc1; tsc1 = rdtsc(); if (pmc_stop(pmcid) < 0) err(EX_OSERR, "failed to stop counter %s", counter_name); stop_cyc = rdtsc() - tsc1; if (pmc_rw(pmcid, 0, &read_clear_value)) err(EX_OSERR, "failed to read and zero %s", counter_name); if (pmc_release(pmcid) < 0) err(EX_OSERR, "failed to release %s as a system counter in counting mode", counter_name); printf("Counter %s, read value %ld, read/clear value %ld\n", counter_name, read_value, read_clear_value); printf("Cycles to start: %ld\tstop: %ld\tread: %ld\twrite: %ld\n", start_cyc, stop_cyc, read_cyc, stop_cyc); return(0); } diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c index 49996229480b..1f314518549a 100644 --- a/sys/dev/hwpmc/hwpmc_logging.c +++ b/sys/dev/hwpmc/hwpmc_logging.c @@ -1,1279 +1,1285 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Logging code for hwpmc(4) */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NUMA #define NDOMAINS vm_ndomains #define curdomain PCPU_GET(domain) #else #define NDOMAINS 1 #define curdomain 0 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) #define free_domain(addr, type) free(addr, type) #endif /* * Sysctl tunables */ SYSCTL_DECL(_kern_hwpmc); /* * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers. */ static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; #if (__FreeBSD_version < 1100000) TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size); #endif SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN, &pmclog_buffer_size, 0, "size of log buffers in kilobytes"); /* * kern.hwpmc.nbuffer -- number of global log buffers */ static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; #if (__FreeBSD_version < 1100000) TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu); #endif SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN, &pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu"); /* * Global log buffer list and associated spin lock. */ static struct mtx pmc_kthread_mtx; /* sleep lock */ #define PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do { \ (D)->plb_fence = ((char *) (buf)) + 1024*pmclog_buffer_size; \ (D)->plb_base = (D)->plb_ptr = ((char *) (buf)); \ (D)->plb_domain = domain; \ } while (0) #define PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do { \ (D)->plb_ptr = (D)->plb_base; \ } while (0) /* * Log file record constructors. */ #define _PMCLOG_TO_HEADER(T,L) \ ((PMCLOG_HEADER_MAGIC << 24) | \ (PMCLOG_TYPE_ ## T << 16) | \ ((L) & 0xFFFF)) /* reserve LEN bytes of space and initialize the entry header */ #define _PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ int _len = roundup((LEN), sizeof(uint32_t)); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ /* reserve LEN bytes of space and initialize the entry header */ #define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ int _len = roundup((LEN), sizeof(uint32_t)); \ spinlock_enter(); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ spinlock_exit(); \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ #define PMCLOG_RESERVE_SAFE(P,T,L) _PMCLOG_RESERVE_SAFE(P,T,L,return) #define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return) #define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \ error=ENOMEM;goto error) #define PMCLOG_EMIT32(V) do { *_le++ = (V); } while (0) #define PMCLOG_EMIT64(V) do { \ *_le++ = (uint32_t) ((V) & 0xFFFFFFFF); \ *_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF); \ } while (0) /* Emit a string. Caution: does NOT update _le, so needs to be last */ #define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0) #define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0) #define PMCLOG_DESPATCH_SAFE(PO) \ pmclog_release((PO)); \ } while (0) #define PMCLOG_DESPATCH_SCHED_LOCK(PO) \ pmclog_release_flags((PO), 0); \ } while (0) #define PMCLOG_DESPATCH(PO) \ pmclog_release((PO)); \ spinlock_exit(); \ } while (0) #define PMCLOG_DESPATCH_SYNC(PO) \ pmclog_schedule_io((PO), 1); \ spinlock_exit(); \ } while (0) /* * Assertions about the log file format. */ CTASSERT(sizeof(struct pmclog_callchain) == 8*4 + PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_closelog) == 4*4); CTASSERT(sizeof(struct pmclog_dropnotify) == 4*4); CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX + 4*4 + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) == 4*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t)); -CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4); +CTASSERT(sizeof(struct pmclog_pmcallocate) == 8*4); CTASSERT(sizeof(struct pmclog_pmcattach) == 6*4 + PATH_MAX); CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 6*4); CTASSERT(sizeof(struct pmclog_pmcdetach) == 6*4); CTASSERT(sizeof(struct pmclog_proccsw) == 6*4 + 8); CTASSERT(sizeof(struct pmclog_procexec) == 6*4 + PATH_MAX + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 6*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_procexit) == 6*4 + 8); CTASSERT(sizeof(struct pmclog_procfork) == 6*4); CTASSERT(sizeof(struct pmclog_sysexit) == 4*4); CTASSERT(sizeof(struct pmclog_userdata) == 4*4); /* * Log buffer structure */ struct pmclog_buffer { TAILQ_ENTRY(pmclog_buffer) plb_next; char *plb_base; char *plb_ptr; char *plb_fence; uint16_t plb_domain; } __aligned(CACHE_LINE_SIZE); /* * Prototypes */ static int pmclog_get_buffer(struct pmc_owner *po); static void pmclog_loop(void *arg); static void pmclog_release(struct pmc_owner *po); static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); static void pmclog_schedule_io(struct pmc_owner *po, int wakeup); static void pmclog_schedule_all(struct pmc_owner *po, int force); static void pmclog_stop_kthread(struct pmc_owner *po); /* * Helper functions */ static inline void pmc_plb_rele_unlocked(struct pmclog_buffer *plb) { TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next); } static inline void pmc_plb_rele(struct pmclog_buffer *plb) { mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); pmc_plb_rele_unlocked(plb); mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); } /* * Get a log buffer */ static int pmclog_get_buffer(struct pmc_owner *po) { struct pmclog_buffer *plb; int domain; KASSERT(po->po_curbuf[curcpu] == NULL, ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); domain = curdomain; MPASS(pmc_dom_hdrs[domain]); mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb); #ifdef HWPMC_DEBUG if (plb) KASSERT(plb->plb_ptr == plb->plb_base && plb->plb_base < plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p " "base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); #endif po->po_curbuf[curcpu] = plb; /* update stats */ counter_u64_add(pmc_stats.pm_buffer_requests, 1); if (plb == NULL) counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1); return (plb ? 0 : ENOMEM); } struct pmclog_proc_init_args { struct proc *kthr; struct pmc_owner *po; bool exit; bool acted; }; int pmclog_proc_create(struct thread *td, void **handlep) { struct pmclog_proc_init_args *ia; int error; ia = malloc(sizeof(*ia), M_TEMP, M_WAITOK | M_ZERO); error = kproc_create(pmclog_loop, ia, &ia->kthr, RFHIGHPID, 0, "hwpmc: proc(%d)", td->td_proc->p_pid); if (error == 0) *handlep = ia; return (error); } void pmclog_proc_ignite(void *handle, struct pmc_owner *po) { struct pmclog_proc_init_args *ia; ia = handle; mtx_lock(&pmc_kthread_mtx); MPASS(!ia->acted); MPASS(ia->po == NULL); MPASS(!ia->exit); MPASS(ia->kthr != NULL); if (po == NULL) { ia->exit = true; } else { ia->po = po; KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread (%p) already present", __LINE__, po, po->po_kthread)); po->po_kthread = ia->kthr; } wakeup(ia); while (!ia->acted) msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogw", 0); mtx_unlock(&pmc_kthread_mtx); free(ia, M_TEMP); } /* * Log handler loop. * * This function is executed by each pmc owner's helper thread. */ static void pmclog_loop(void *arg) { struct pmclog_proc_init_args *ia; struct pmc_owner *po; struct pmclog_buffer *lb; struct proc *p; struct ucred *ownercred; struct ucred *mycred; struct thread *td; sigset_t unb; struct uio auio; struct iovec aiov; size_t nbytes; int error; td = curthread; SIGEMPTYSET(unb); SIGADDSET(unb, SIGHUP); (void)kern_sigprocmask(td, SIG_UNBLOCK, &unb, NULL, 0); ia = arg; MPASS(ia->kthr == curproc); MPASS(!ia->acted); mtx_lock(&pmc_kthread_mtx); while (ia->po == NULL && !ia->exit) msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogi", 0); if (ia->exit) { ia->acted = true; wakeup(ia); mtx_unlock(&pmc_kthread_mtx); kproc_exit(0); } MPASS(ia->po != NULL); po = ia->po; ia->acted = true; wakeup(ia); mtx_unlock(&pmc_kthread_mtx); ia = NULL; p = po->po_owner; mycred = td->td_ucred; PROC_LOCK(p); ownercred = crhold(p->p_ucred); PROC_UNLOCK(p); PMCDBG2(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread); KASSERT(po->po_kthread == curthread->td_proc, ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__, po, po->po_kthread, curthread->td_proc)); lb = NULL; /* * Loop waiting for I/O requests to be added to the owner * struct's queue. The loop is exited when the log file * is deconfigured. */ mtx_lock(&pmc_kthread_mtx); for (;;) { /* check if we've been asked to exit */ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) break; if (lb == NULL) { /* look for a fresh buffer to write */ mtx_lock_spin(&po->po_mtx); if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) { mtx_unlock_spin(&po->po_mtx); /* No more buffers and shutdown required. */ if (po->po_flags & PMC_PO_SHUTDOWN) break; (void) msleep(po, &pmc_kthread_mtx, PWAIT, "pmcloop", 250); continue; } TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); mtx_unlock_spin(&po->po_mtx); } mtx_unlock(&pmc_kthread_mtx); /* process the request */ PMCDBG3(LOG,WRI,2, "po=%p base=%p ptr=%p", po, lb->plb_base, lb->plb_ptr); /* change our thread's credentials before issuing the I/O */ aiov.iov_base = lb->plb_base; aiov.iov_len = nbytes = lb->plb_ptr - lb->plb_base; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = -1; auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; /* switch thread credentials -- see kern_ktrace.c */ td->td_ucred = ownercred; error = fo_write(po->po_file, &auio, ownercred, 0, td); td->td_ucred = mycred; if (error) { /* XXX some errors are recoverable */ /* send a SIGIO to the owner and exit */ PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); mtx_lock(&pmc_kthread_mtx); po->po_error = error; /* save for flush log */ PMCDBG2(LOG,WRI,2, "po=%p error=%d", po, error); break; } mtx_lock(&pmc_kthread_mtx); /* put the used buffer back into the global pool */ PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); lb = NULL; } wakeup_one(po->po_kthread); po->po_kthread = NULL; mtx_unlock(&pmc_kthread_mtx); /* return the current I/O buffer to the global pool */ if (lb) { PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } /* * Exit this thread, signalling the waiter */ crfree(ownercred); kproc_exit(0); } /* * Release and log entry and schedule an I/O if needed. */ static void pmclog_release_flags(struct pmc_owner *po, int wakeup) { struct pmclog_buffer *plb; plb = po->po_curbuf[curcpu]; KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, plb->plb_ptr, plb->plb_base)); KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, plb->plb_ptr, plb->plb_fence)); /* schedule an I/O if we've filled a buffer */ if (plb->plb_ptr >= plb->plb_fence) pmclog_schedule_io(po, wakeup); PMCDBG1(LOG,REL,1, "po=%p", po); } static void pmclog_release(struct pmc_owner *po) { pmclog_release_flags(po, 1); } /* * Attempt to reserve 'length' bytes of space in an owner's log * buffer. The function returns a pointer to 'length' bytes of space * if there was enough space or returns NULL if no space was * available. Non-null returns do so with the po mutex locked. The * caller must invoke pmclog_release() on the pmc owner structure * when done. */ static uint32_t * pmclog_reserve(struct pmc_owner *po, int length) { uintptr_t newptr, oldptr; uint32_t *lh; struct timespec ts; struct pmclog_buffer *plb, **pplb; PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length); KASSERT(length % sizeof(uint32_t) == 0, ("[pmclog,%d] length not a multiple of word size", __LINE__)); /* No more data when shutdown in progress. */ if (po->po_flags & PMC_PO_SHUTDOWN) return (NULL); pplb = &po->po_curbuf[curcpu]; if (*pplb == NULL && pmclog_get_buffer(po) != 0) goto fail; KASSERT(*pplb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); plb = *pplb; KASSERT(plb->plb_ptr >= plb->plb_base && plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); oldptr = (uintptr_t) plb->plb_ptr; newptr = oldptr + length; KASSERT(oldptr != (uintptr_t) NULL, ("[pmclog,%d] po=%p Null log buffer pointer", __LINE__, po)); /* * If we have space in the current buffer, return a pointer to * available space with the PO structure locked. */ if (newptr <= (uintptr_t) plb->plb_fence) { plb->plb_ptr = (char *) newptr; goto done; } /* * Otherwise, schedule the current buffer for output and get a * fresh buffer. */ pmclog_schedule_io(po, 0); if (pmclog_get_buffer(po) != 0) goto fail; plb = *pplb; KASSERT(plb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); KASSERT(plb->plb_ptr != NULL, ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__)); KASSERT(plb->plb_ptr == plb->plb_base && plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); oldptr = (uintptr_t) plb->plb_ptr; done: lh = (uint32_t *) oldptr; lh++; /* skip header */ getnanotime(&ts); /* fill in the timestamp */ *lh++ = ts.tv_sec & 0xFFFFFFFF; *lh++ = ts.tv_nsec & 0xFFFFFFF; return ((uint32_t *) oldptr); fail: return (NULL); } /* * Schedule an I/O. * * Transfer the current buffer to the helper kthread. */ static void pmclog_schedule_io(struct pmc_owner *po, int wakeup) { struct pmclog_buffer *plb; plb = po->po_curbuf[curcpu]; po->po_curbuf[curcpu] = NULL; KASSERT(plb != NULL, ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, plb->plb_ptr, plb->plb_base)); KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, plb->plb_ptr, plb->plb_fence)); PMCDBG1(LOG,SIO, 1, "po=%p", po); /* * Add the current buffer to the tail of the buffer list and * wakeup the helper. */ mtx_lock_spin(&po->po_mtx); TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next); mtx_unlock_spin(&po->po_mtx); if (wakeup) wakeup_one(po); } /* * Stop the helper kthread. */ static void pmclog_stop_kthread(struct pmc_owner *po) { mtx_lock(&pmc_kthread_mtx); po->po_flags &= ~PMC_PO_OWNS_LOGFILE; if (po->po_kthread != NULL) { PROC_LOCK(po->po_kthread); kern_psignal(po->po_kthread, SIGHUP); PROC_UNLOCK(po->po_kthread); } wakeup_one(po); while (po->po_kthread) msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0); mtx_unlock(&pmc_kthread_mtx); } /* * Public functions */ /* * Configure a log file for pmc owner 'po'. * * Parameter 'logfd' is a file handle referencing an open file in the * owner process. This file needs to have been opened for writing. */ int pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd) { struct proc *p; int error; sx_assert(&pmc_sx, SA_XLOCKED); PMCDBG2(LOG,CFG,1, "config po=%p logfd=%d", po, logfd); p = po->po_owner; /* return EBUSY if a log file was already present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) return (EBUSY); KASSERT(po->po_file == NULL, ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po, po->po_file)); /* get a reference to the file state */ error = fget_write(curthread, logfd, &cap_write_rights, &po->po_file); if (error) goto error; /* mark process as owning a log file */ po->po_flags |= PMC_PO_OWNS_LOGFILE; /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); /* create a log initialization entry */ PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE, sizeof(struct pmclog_initialize)); PMCLOG_EMIT32(PMC_VERSION); PMCLOG_EMIT32(md->pmd_cputype); PMCLOG_EMITSTRING(pmc_cpuid, PMC_CPUID_LEN); PMCLOG_DESPATCH_SYNC(po); return (0); error: KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not " "stopped", __LINE__, po)); if (po->po_file) (void) fdrop(po->po_file, curthread); po->po_file = NULL; /* clear file and error state */ po->po_error = 0; po->po_flags &= ~PMC_PO_OWNS_LOGFILE; return (error); } /* * De-configure a log file. This will throw away any buffers queued * for this owner process. */ int pmclog_deconfigure_log(struct pmc_owner *po) { int error; struct pmclog_buffer *lb; PMCDBG1(LOG,CFG,1, "de-config po=%p", po); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EINVAL); KASSERT(po->po_sscount == 0, ("[pmclog,%d] po=%p still owning SS PMCs", __LINE__, po)); KASSERT(po->po_file != NULL, ("[pmclog,%d] po=%p no log file", __LINE__, po)); /* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */ pmclog_stop_kthread(po); KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not stopped", __LINE__, po)); /* return all queued log buffers to the global pool */ while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) { TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } for (int i = 0; i < mp_ncpus; i++) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); /* return the 'current' buffer to the global pool */ if ((lb = po->po_curbuf[curcpu]) != NULL) { PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); /* drop a reference to the fd */ if (po->po_file != NULL) { error = fdrop(po->po_file, curthread); po->po_file = NULL; } else error = 0; po->po_error = 0; return (error); } /* * Flush a process' log buffer. */ int pmclog_flush(struct pmc_owner *po, int force) { int error; PMCDBG1(LOG,FLS,1, "po=%p", po); /* * If there is a pending error recorded by the logger thread, * return that. */ if (po->po_error) return (po->po_error); error = 0; /* * Check that we do have an active log file. */ mtx_lock(&pmc_kthread_mtx); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; goto error; } pmclog_schedule_all(po, force); error: mtx_unlock(&pmc_kthread_mtx); return (error); } static void pmclog_schedule_one_cond(struct pmc_owner *po, int force) { struct pmclog_buffer *plb; int cpu; spinlock_enter(); cpu = curcpu; /* tell hardclock not to run again */ if (PMC_CPU_HAS_SAMPLES(cpu)) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); if (force) pmc_flush_samples(cpu); plb = po->po_curbuf[cpu]; if (plb && plb->plb_ptr != plb->plb_base) pmclog_schedule_io(po, 1); spinlock_exit(); } static void pmclog_schedule_all(struct pmc_owner *po, int force) { /* * Schedule the current buffer if any and not empty. */ for (int i = 0; i < mp_ncpus; i++) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); pmclog_schedule_one_cond(po, force); } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } int pmclog_close(struct pmc_owner *po) { PMCDBG1(LOG,CLO,1, "po=%p", po); pmclog_process_closelog(po); mtx_lock(&pmc_kthread_mtx); /* * Initiate shutdown: no new data queued, * thread will close file on last block. */ po->po_flags |= PMC_PO_SHUTDOWN; /* give time for all to see */ DELAY(50); /* * Schedule the current buffer. */ pmclog_schedule_all(po, 0); wakeup_one(po); mtx_unlock(&pmc_kthread_mtx); return (0); } void pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps) { int n, recordlen; uint32_t flags; struct pmc_owner *po; PMCDBG3(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid, ps->ps_nsamples); recordlen = offsetof(struct pmclog_callchain, pl_pc) + ps->ps_nsamples * sizeof(uintfptr_t); po = pm->pm_owner; flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags); PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen); PMCLOG_EMIT32(ps->ps_pid); PMCLOG_EMIT32(ps->ps_tid); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(flags); /* unused for now */ PMCLOG_EMIT32(0); for (n = 0; n < ps->ps_nsamples; n++) PMCLOG_EMITADDR(ps->ps_pc[n]); PMCLOG_DESPATCH_SAFE(po); } void pmclog_process_closelog(struct pmc_owner *po) { PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog)); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_dropnotify(struct pmc_owner *po) { PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify)); PMCLOG_DESPATCH(po); } void pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, const char *path) { int pathlen, recordlen; KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__)); pathlen = strlen(path) + 1; /* #bytes for path name */ recordlen = offsetof(struct pmclog_map_in, pl_pathname) + pathlen; PMCLOG_RESERVE(po, MAP_IN, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, uintfptr_t end) { KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__)); PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out)); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITADDR(end); PMCLOG_DESPATCH(po); } void pmclog_process_pmcallocate(struct pmc *pm) { struct pmc_owner *po; struct pmc_soft *ps; po = pm->pm_owner; PMCDBG1(LOG,ALL,1, "pm=%p", pm); if (PMC_TO_CLASS(pm) == PMC_CLASS_SOFT) { PMCLOG_RESERVE(po, PMCALLOCATEDYN, sizeof(struct pmclog_pmcallocatedyn)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); + PMCLOG_EMIT64(pm->pm_sc.pm_reloadcount); ps = pmc_soft_ev_acquire(pm->pm_event); if (ps != NULL) PMCLOG_EMITSTRING(ps->ps_ev.pm_ev_name,PMC_NAME_MAX); else PMCLOG_EMITNULLSTRING(PMC_NAME_MAX); pmc_soft_ev_release(ps); PMCLOG_DESPATCH_SYNC(po); } else { PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); + PMCLOG_EMIT64(pm->pm_sc.pm_reloadcount); PMCLOG_DESPATCH_SYNC(po); } } void pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path) { int pathlen, recordlen; struct pmc_owner *po; PMCDBG2(LOG,ATT,1,"pm=%p pid=%d", pm, pid); po = pm->pm_owner; pathlen = strlen(path) + 1; /* #bytes for the string */ recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PMCATTACH, recordlen); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_EMIT32(0); PMCLOG_EMITSTRING(path, pathlen); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_pmcdetach(struct pmc *pm, pid_t pid) { struct pmc_owner *po; PMCDBG2(LOG,ATT,1,"!pm=%p pid=%d", pm, pid); po = pm->pm_owner; PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_proccreate(struct pmc_owner *po, struct proc *p, int sync) { if (sync) { PMCLOG_RESERVE(po, PROC_CREATE, sizeof(struct pmclog_proccreate)); PMCLOG_EMIT32(p->p_pid); + PMCLOG_EMIT32(p->p_flag); + PMCLOG_EMIT32(0); PMCLOG_EMITSTRING(p->p_comm, MAXCOMLEN+1); PMCLOG_DESPATCH_SYNC(po); } else { PMCLOG_RESERVE(po, PROC_CREATE, sizeof(struct pmclog_proccreate)); PMCLOG_EMIT32(p->p_pid); + PMCLOG_EMIT32(p->p_flag); + PMCLOG_EMIT32(0); PMCLOG_EMITSTRING(p->p_comm, MAXCOMLEN+1); PMCLOG_DESPATCH(po); } } /* * Log a context switch event to the log file. */ void pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v, struct thread *td) { struct pmc_owner *po; KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW, ("[pmclog,%d] log-process-csw called gratuitously", __LINE__)); PMCDBG3(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, v); po = pm->pm_owner; PMCLOG_RESERVE_SAFE(po, PROCCSW, sizeof(struct pmclog_proccsw)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT64(v); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_EMIT32(td->td_tid); PMCLOG_DESPATCH_SCHED_LOCK(po); } void pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid, uintfptr_t startaddr, char *path) { int pathlen, recordlen; PMCDBG3(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path); pathlen = strlen(path) + 1; /* #bytes for the path */ recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PROCEXEC, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMIT32(pmid); PMCLOG_EMIT32(0); PMCLOG_EMITADDR(startaddr); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH_SYNC(po); } /* * Log a process exit event (and accumulated pmc value) to the log file. */ void pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_owner *po; ri = PMC_TO_ROWINDEX(pm); PMCDBG3(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, pp->pp_pmcs[ri].pp_pmcval); po = pm->pm_owner; PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_EMIT32(0); PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval); PMCLOG_DESPATCH(po); } /* * Log a fork event. */ void pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid) { PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork)); PMCLOG_EMIT32(oldpid); PMCLOG_EMIT32(newpid); PMCLOG_DESPATCH(po); } /* * Log a process exit event of the form suitable for system-wide PMCs. */ void pmclog_process_sysexit(struct pmc_owner *po, pid_t pid) { PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit)); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH(po); } void pmclog_process_threadcreate(struct pmc_owner *po, struct thread *td, int sync) { struct proc *p; p = td->td_proc; if (sync) { PMCLOG_RESERVE(po, THR_CREATE, sizeof(struct pmclog_threadcreate)); PMCLOG_EMIT32(td->td_tid); PMCLOG_EMIT32(p->p_pid); - PMCLOG_EMIT32(0); + PMCLOG_EMIT32(p->p_flag); PMCLOG_EMITSTRING(td->td_name, MAXCOMLEN+1); PMCLOG_DESPATCH_SYNC(po); } else { PMCLOG_RESERVE(po, THR_CREATE, sizeof(struct pmclog_threadcreate)); PMCLOG_EMIT32(td->td_tid); PMCLOG_EMIT32(p->p_pid); - PMCLOG_EMIT32(0); + PMCLOG_EMIT32(p->p_flag); PMCLOG_EMITSTRING(td->td_name, MAXCOMLEN+1); PMCLOG_DESPATCH(po); } } void pmclog_process_threadexit(struct pmc_owner *po, struct thread *td) { PMCLOG_RESERVE(po, THR_EXIT, sizeof(struct pmclog_threadexit)); PMCLOG_EMIT32(td->td_tid); PMCLOG_DESPATCH(po); } /* * Write a user log entry. */ int pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl) { int error; PMCDBG2(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata); error = 0; PMCLOG_RESERVE_WITH_ERROR(po, USERDATA, sizeof(struct pmclog_userdata)); PMCLOG_EMIT32(wl->pm_userdata); PMCLOG_DESPATCH(po); error: return (error); } /* * Initialization. * * Create a pool of log buffers and initialize mutexes. */ void pmclog_initialize() { int domain; struct pmclog_buffer *plb; if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) { (void) printf("hwpmc: tunable logbuffersize=%d must be " "greater than zero and less than or equal to 16MB.\n", pmclog_buffer_size); pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } if (pmc_nlogbuffers_pcpu <= 0) { (void) printf("hwpmc: tunable nlogbuffers=%d must be greater " "than zero.\n", pmc_nlogbuffers_pcpu); pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; } if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) { (void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n", pmc_nlogbuffers_pcpu*pmclog_buffer_size); pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } for (domain = 0; domain < NDOMAINS; domain++) { int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus; int total = ncpus*pmc_nlogbuffers_pcpu; plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO); pmc_dom_hdrs[domain]->pdbh_plbs = plb; for (int i = 0; i < total; i++, plb++) { void *buf; buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain, M_WAITOK|M_ZERO); PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain); pmc_plb_rele_unlocked(plb); } } mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF); } /* * Shutdown logging. * * Destroy mutexes and release memory back the to free pool. */ void pmclog_shutdown() { struct pmclog_buffer *plb; int domain; mtx_destroy(&pmc_kthread_mtx); for (domain = 0; domain < NDOMAINS; domain++) { while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) { TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); free(plb->plb_base, M_PMC); } free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC); } } diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 191f2554ee60..cb09592a61d1 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1,5960 +1,5966 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needs to be after */ #include #include #include #include #include #include #include #include "hwpmc_soft.h" #ifdef NUMA #define NDOMAINS vm_ndomains #else #define NDOMAINS 1 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) #define free_domain(addr, type) free(addr, type) #endif /* * Types */ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ PMC_FLAG_NOWAIT = 0x04, /* do not wait for mallocs */ }; /* * The offset in sysent where the syscall is allocated. */ static int pmc_syscall_num = NO_SYSCALL; struct pmc_cpu **pmc_pcpu; /* per-cpu state */ pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] struct mtx_pool *pmc_mtxpool; static int *pmc_pmcdisp; /* PMC row dispositions */ #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) #define PMC_MARK_ROW_FREE(R) do { \ pmc_pmcdisp[(R)] = 0; \ } while (0) #define PMC_MARK_ROW_STANDALONE(R) do { \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ ("[pmc,%d] row disposition error", __LINE__)); \ } while (0) #define PMC_UNMARK_ROW_STANDALONE(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) #define PMC_MARK_ROW_THREAD(R) do { \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ } while (0) #define PMC_UNMARK_ROW_THREAD(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) /* various event handlers */ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, pmc_kld_unload_tag; /* Module statistics */ struct pmc_driverstats pmc_stats; /* Machine/processor dependent operations */ static struct pmc_mdep *md; /* * Hash tables mapping owner processes and target threads to PMCs. */ struct mtx pmc_processhash_mtx; /* spin mutex */ static u_long pmc_processhashmask; static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; /* * Hash table of PMC owner descriptors. This table is protected by * the shared PMC "sx" lock. */ static u_long pmc_ownerhashmask; static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; /* * List of PMC owners with system-wide sampling PMCs. */ static CK_LIST_HEAD(, pmc_owner) pmc_ss_owners; /* * List of free thread entries. This is protected by the spin * mutex. */ static struct mtx pmc_threadfreelist_mtx; /* spin mutex */ static LIST_HEAD(, pmc_thread) pmc_threadfreelist; static int pmc_threadfreelist_entries=0; #define THREADENTRY_SIZE \ (sizeof(struct pmc_thread) + (md->pmd_npmc * sizeof(struct pmc_threadpmcstate))) /* * Task to free thread descriptors */ static struct grouptask free_gtask; /* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; /* * Prototypes */ #ifdef HWPMC_DEBUG static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); static int pmc_add_sample(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); static void pmc_destroy_process_descriptor(struct pmc_process *pp); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); static void pmc_log_all_process_mappings(struct pmc_owner *po); static void pmc_log_kernel_mappings(struct pmc *pm); static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_process_thread_add(struct thread *td); static void pmc_process_thread_delete(struct thread *td); static void pmc_process_thread_userret(struct thread *td); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); static void pmc_save_cpu_binding(struct pmc_binding *pb); static void pmc_select_cpu(int cpu); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void); static void pmc_thread_descriptor_pool_drain(void); static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); static void pmc_post_callchain_callback(void); static void pmc_process_threadcreate(struct thread *td); static void pmc_process_threadexit(struct thread *td); static void pmc_process_proccreate(struct proc *p); static void pmc_process_allproc(struct pmc *pm); /* * Kernel tunables and sysctl(8) interface. */ SYSCTL_DECL(_kern_hwpmc); SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats"); /* Stats. */ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW, &pmc_stats.pm_intr_ignored, "# of interrupts ignored"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW, &pmc_stats.pm_intr_processed, "# of interrupts processed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW, &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW, &pmc_stats.pm_syscalls, "# of syscalls"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW, &pmc_stats.pm_syscall_errors, "# of syscall_errors"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW, &pmc_stats.pm_buffer_requests, "# of buffer requests"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW, &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, &pmc_stats.pm_log_sweeps, "# of ?"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW, &pmc_stats.pm_merges, "# of times kernel stack was found for user trace"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW, &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged"); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); char pmc_cpuid[64]; SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD, pmc_cpuid, 0, "cpu version string"); #ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr)); SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif /* * kern.hwpmc.hashrows -- determines the number of rows in the * of the hash table used to look up threads */ static int pmc_hashsize = PMC_HASH_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, &pmc_hashsize, 0, "rows in hash tables"); /* * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, &pmc_mtxpool_size, 0, "size of spin mutex pool"); /* * kern.hwpmc.threadfreelist_entries -- number of free entries */ SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD, &pmc_threadfreelist_entries, 0, "number of avalable thread entries"); /* * kern.hwpmc.threadfreelist_max -- maximum number of free entries */ static int pmc_threadfreelist_max = PMC_THREADLIST_MAX; SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW, &pmc_threadfreelist_max, 0, "maximum number of available thread entries before freeing some"); /* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * * Allowing unprivileged processes to allocate system PMCs is convenient * if system-wide measurements need to be taken concurrently with other * per-process measurements. This feature is turned off by default. */ static int pmc_unprivileged_syspmcs = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, &pmc_unprivileged_syspmcs, 0, "allow unprivileged process to allocate system PMCs"); /* * Hash function. Discard the lower 2 bits of the pointer since * these are always zero for our uses. The hash multiplier is * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). */ #if LONG_BIT == 64 #define _PMC_HM 11400714819323198486u #elif LONG_BIT == 32 #define _PMC_HM 2654435769u #else #error Must know the size of 'long' to compile #endif #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) /* * Syscall structures */ /* The `sysent' for the new syscall */ static struct sysent pmc_sysent = { .sy_narg = 2, .sy_call = pmc_syscall_handler, }; static struct syscall_module_data pmc_syscall_mod = { .chainevh = load, .chainarg = NULL, .offset = &pmc_syscall_num, .new_sysent = &pmc_sysent, .old_sysent = { .sy_narg = 0, .sy_call = NULL }, .flags = SY_THR_STATIC_KLD, }; static moduledata_t pmc_mod = { .name = PMC_MODULE_NAME, .evhand = syscall_module_handler, .priv = &pmc_syscall_mod, }; #ifdef EARLY_AP_STARTUP DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY); #else DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); #endif MODULE_VERSION(pmc, PMC_VERSION); #ifdef HWPMC_DEBUG enum pmc_dbgparse_state { PMCDS_WS, /* in whitespace */ PMCDS_MAJOR, /* seen a major keyword */ PMCDS_MINOR }; static int pmc_debugflags_parse(char *newstr, char *fence) { char c, *p, *q; struct pmc_debugflags *tmpflags; int error, found, *newbits, tmp; size_t kwlen; tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); p = newstr; error = 0; for (; p < fence && (c = *p); p++) { /* skip white space */ if (c == ' ' || c == '\t') continue; /* look for a keyword followed by "=" */ for (q = p; p < fence && (c = *p) && c != '='; p++) ; if (c != '=') { error = EINVAL; goto done; } kwlen = p - q; newbits = NULL; /* lookup flag group name */ #define DBG_SET_FLAG_MAJ(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ newbits = &tmpflags->pdb_ ## F; DBG_SET_FLAG_MAJ("cpu", CPU); DBG_SET_FLAG_MAJ("csw", CSW); DBG_SET_FLAG_MAJ("logging", LOG); DBG_SET_FLAG_MAJ("module", MOD); DBG_SET_FLAG_MAJ("md", MDP); DBG_SET_FLAG_MAJ("owner", OWN); DBG_SET_FLAG_MAJ("pmc", PMC); DBG_SET_FLAG_MAJ("process", PRC); DBG_SET_FLAG_MAJ("sampling", SAM); if (newbits == NULL) { error = EINVAL; goto done; } p++; /* skip the '=' */ /* Now parse the individual flags */ tmp = 0; newflag: for (q = p; p < fence && (c = *p); p++) if (c == ' ' || c == '\t' || c == ',') break; /* p == fence or c == ws or c == "," or c == 0 */ if ((kwlen = p - q) == 0) { *newbits = tmp; continue; } found = 0; #define DBG_SET_FLAG_MIN(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) /* a '*' denotes all possible flags in the group */ if (kwlen == 1 && *q == '*') tmp = found = ~0; /* look for individual flag names */ DBG_SET_FLAG_MIN("allocaterow", ALR); DBG_SET_FLAG_MIN("allocate", ALL); DBG_SET_FLAG_MIN("attach", ATT); DBG_SET_FLAG_MIN("bind", BND); DBG_SET_FLAG_MIN("config", CFG); DBG_SET_FLAG_MIN("exec", EXC); DBG_SET_FLAG_MIN("exit", EXT); DBG_SET_FLAG_MIN("find", FND); DBG_SET_FLAG_MIN("flush", FLS); DBG_SET_FLAG_MIN("fork", FRK); DBG_SET_FLAG_MIN("getbuf", GTB); DBG_SET_FLAG_MIN("hook", PMH); DBG_SET_FLAG_MIN("init", INI); DBG_SET_FLAG_MIN("intr", INT); DBG_SET_FLAG_MIN("linktarget", TLK); DBG_SET_FLAG_MIN("mayberemove", OMR); DBG_SET_FLAG_MIN("ops", OPS); DBG_SET_FLAG_MIN("read", REA); DBG_SET_FLAG_MIN("register", REG); DBG_SET_FLAG_MIN("release", REL); DBG_SET_FLAG_MIN("remove", ORM); DBG_SET_FLAG_MIN("sample", SAM); DBG_SET_FLAG_MIN("scheduleio", SIO); DBG_SET_FLAG_MIN("select", SEL); DBG_SET_FLAG_MIN("signal", SIG); DBG_SET_FLAG_MIN("swi", SWI); DBG_SET_FLAG_MIN("swo", SWO); DBG_SET_FLAG_MIN("start", STA); DBG_SET_FLAG_MIN("stop", STO); DBG_SET_FLAG_MIN("syscall", PMS); DBG_SET_FLAG_MIN("unlinktarget", TUL); DBG_SET_FLAG_MIN("write", WRI); if (found == 0) { /* unrecognized flag name */ error = EINVAL; goto done; } if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ *newbits = tmp; continue; } p++; goto newflag; } /* save the new flag set */ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); done: free(tmpflags, M_PMC); return error; } static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) { char *fence, *newstr; int error; unsigned int n; (void) arg1; (void) arg2; /* unused parameters */ n = sizeof(pmc_debugstr); newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO); (void) strlcpy(newstr, pmc_debugstr, n); error = sysctl_handle_string(oidp, newstr, n, req); /* if there is a new string, parse and copy it */ if (error == 0 && req->newptr != NULL) { fence = newstr + (n < req->newlen ? n : req->newlen + 1); if ((error = pmc_debugflags_parse(newstr, fence)) == 0) (void) strlcpy(pmc_debugstr, newstr, sizeof(pmc_debugstr)); } free(newstr, M_PMC); return error; } #endif /* * Map a row index to a classdep structure and return the adjusted row * index for the PMC class index. */ static struct pmc_classdep * pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri) { struct pmc_classdep *pcd; (void) md; KASSERT(ri >= 0 && ri < md->pmd_npmc, ("[pmc,%d] illegal row-index %d", __LINE__, ri)); pcd = pmc_rowindex_to_classdep[ri]; KASSERT(pcd != NULL, ("[pmc,%d] ri %d null pcd", __LINE__, ri)); *adjri = ri - pcd->pcd_ri; KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); return (pcd); } /* * Concurrency Control * * The driver manages the following data structures: * * - target process descriptors, one per target process * - owner process descriptors (and attached lists), one per owner process * - lookup hash tables for owner and target processes * - PMC descriptors (and attached lists) * - per-cpu hardware state * - the 'hook' variable through which the kernel calls into * this module * - the machine hardware state (managed by the MD layer) * * These data structures are accessed from: * * - thread context-switch code * - interrupt handlers (possibly on multiple cpus) * - kernel threads on multiple cpus running on behalf of user * processes doing system calls * - this driver's private kernel threads * * = Locks and Locking strategy = * * The driver uses four locking strategies for its operation: * * - The global SX lock "pmc_sx" is used to protect internal * data structures. * * Calls into the module by syscall() start with this lock being * held in exclusive mode. Depending on the requested operation, * the lock may be downgraded to 'shared' mode to allow more * concurrent readers into the module. Calls into the module from * other parts of the kernel acquire the lock in shared mode. * * This SX lock is held in exclusive mode for any operations that * modify the linkages between the driver's internal data structures. * * The 'pmc_hook' function pointer is also protected by this lock. * It is only examined with the sx lock held in exclusive mode. The * kernel module is allowed to be unloaded only with the sx lock held * in exclusive mode. In normal syscall handling, after acquiring the * pmc_sx lock we first check that 'pmc_hook' is non-null before * proceeding. This prevents races between the thread unloading the module * and other threads seeking to use the module. * * - Lookups of target process structures and owner process structures * cannot use the global "pmc_sx" SX lock because these lookups need * to happen during context switches and in other critical sections * where sleeping is not allowed. We protect these lookup tables * with their own private spin-mutexes, "pmc_processhash_mtx" and * "pmc_ownerhash_mtx". * * - Interrupt handlers work in a lock free manner. At interrupt * time, handlers look at the PMC pointer (phw->phw_pmc) configured * when the PMC was started. If this pointer is NULL, the interrupt * is ignored after updating driver statistics. We ensure that this * pointer is set (using an atomic operation if necessary) before the * PMC hardware is started. Conversely, this pointer is unset atomically * only after the PMC hardware is stopped. * * We ensure that everything needed for the operation of an * interrupt handler is available without it needing to acquire any * locks. We also ensure that a PMC's software state is destroyed only * after the PMC is taken off hardware (on all CPUs). * * - Context-switch handling with process-private PMCs needs more * care. * * A given process may be the target of multiple PMCs. For example, * PMCATTACH and PMCDETACH may be requested by a process on one CPU * while the target process is running on another. A PMC could also * be getting released because its owner is exiting. We tackle * these situations in the following manner: * * - each target process structure 'pmc_process' has an array * of 'struct pmc *' pointers, one for each hardware PMC. * * - At context switch IN time, each "target" PMC in RUNNING state * gets started on hardware and a pointer to each PMC is copied into * the per-cpu phw array. The 'runcount' for the PMC is * incremented. * * - At context switch OUT time, all process-virtual PMCs are stopped * on hardware. The saved value is added to the PMCs value field * only if the PMC is in a non-deleted state (the PMCs state could * have changed during the current time slice). * * Note that since in-between a switch IN on a processor and a switch * OUT, the PMC could have been released on another CPU. Therefore * context switch OUT always looks at the hardware state to turn * OFF PMCs and will update a PMC's saved value only if reachable * from the target process record. * * - OP PMCRELEASE could be called on a PMC at any time (the PMC could * be attached to many processes at the time of the call and could * be active on multiple CPUs). * * We prevent further scheduling of the PMC by marking it as in * state 'DELETED'. If the runcount of the PMC is non-zero then * this PMC is currently running on a CPU somewhere. The thread * doing the PMCRELEASE operation waits by repeatedly doing a * pause() till the runcount comes to zero. * * The contents of a PMC descriptor (struct pmc) are protected using * a spin-mutex. In order to save space, we use a mutex pool. * * In terms of lock types used by witness(4), we use: * - Type "pmc-sx", used by the global SX lock. * - Type "pmc-sleep", for sleep mutexes used by logger threads. * - Type "pmc-per-proc", for protecting PMC owner descriptors. * - Type "pmc-leaf", used for all other spin mutexes. */ /* * save the cpu binding of the current kthread */ static void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG0(CPU,BND,2, "save-cpu"); thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; thread_unlock(curthread); PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } /* * restore the cpu binding of the current thread */ static void pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); thread_lock(curthread); if (pb->pb_bound) sched_bind(curthread, pb->pb_cpu); else sched_unbind(curthread); thread_unlock(curthread); PMCDBG0(CPU,BND,2, "restore-cpu done"); } /* * move execution over the specified cpu and bind it there. */ static void pmc_select_cpu(int cpu) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); /* Never move to an inactive CPU. */ KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " "CPU %d", __LINE__, cpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu); thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, cpu, curthread->td_oncpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); } /* * Force a context switch. * * We do this by pause'ing for 1 tick -- invoking mi_switch() is not * guaranteed to force a context switch. */ static void pmc_force_context_switch(void) { pause("pmcctx", 1); } /* * Get the file name for an executable. This is a simple wrapper * around vn_fullpath(9). */ static void pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) { *fullpath = "unknown"; *freepath = NULL; vn_fullpath(curthread, v, fullpath, freepath); } /* * remove an process owning PMCs */ void pmc_remove_owner(struct pmc_owner *po) { struct pmc *pm, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po); /* Remove descriptor from the owner hash table */ LIST_REMOVE(po, po_next); /* release all owned PMC descriptors */ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { PMCDBG1(OWN,ORM,2, "pmc=%p", pm); KASSERT(pm->pm_owner == po, ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); pmc_release_pmc_descriptor(pm); /* will unlink from the list */ pmc_destroy_pmc_descriptor(pm); } KASSERT(po->po_sscount == 0, ("[pmc,%d] SS count not zero", __LINE__)); KASSERT(LIST_EMPTY(&po->po_pmcs), ("[pmc,%d] PMC list not empty", __LINE__)); /* de-configure the log file if present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_deconfigure_log(po); } /* * remove an owner process record if all conditions are met. */ static void pmc_maybe_remove_owner(struct pmc_owner *po) { PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po); /* * Remove owner record if * - this process does not own any PMCs * - this process has not allocated a system-wide sampling buffer */ if (LIST_EMPTY(&po->po_pmcs) && ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } } /* * Add an association between a target process and a PMC. */ static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_target *pt; #ifdef INVARIANTS struct pmc_thread *pt_td; #endif sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", __LINE__, pm, pp->pp_proc->p_pid)); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); #ifdef HWPMC_DEBUG LIST_FOREACH(pt, &pm->pm_targets, pt_next) if (pt->pt_process == pp) KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", __LINE__, pp, pm)); #endif pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO); pt->pt_process = pp; LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, (uintptr_t)pm); if (pm->pm_owner->po_owner == pp->pp_proc) pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; /* * Initialize the per-process values at this row index. */ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? pm->pm_sc.pm_reloadcount : 0; pp->pp_refcnt++; #ifdef INVARIANTS /* Confirm that the per-thread values at this row index are cleared. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) { KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0, ("[pmc,%d] pt_pmcval not cleared for pid=%d at " "ri=%d", __LINE__, pp->pp_proc->p_pid, ri)); } mtx_unlock_spin(pp->pp_tdslock); } #endif } /* * Removes the association between a target process and a PMC. */ static void pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct proc *p; struct pmc_target *ptgt; struct pmc_thread *pt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, ri, pm, pp->pp_pmcs[ri].pp_pmc)); pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; /* Clear the per-thread values at this row index. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t) 0; mtx_unlock_spin(pp->pp_tdslock); } /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; } pp->pp_refcnt--; /* Remove the target process from the PMC structure */ LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) if (ptgt->pt_process == pp) break; KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); LIST_REMOVE(ptgt, pt_next); free(ptgt, M_PMC); /* if the PMC now lacks targets, send the owner a SIGIO */ if (LIST_EMPTY(&pm->pm_targets)) { p = pm->pm_owner->po_owner; PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p, SIGIO); } } /* * Check if PMC 'pm' may be attached to target process 't'. */ static int pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ int decline_attach, i; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) return 0; PROC_LOCK(o); oc = o->p_ucred; crhold(oc); PROC_UNLOCK(o); PROC_LOCK(t); tc = t->p_ucred; crhold(tc); PROC_UNLOCK(t); /* * The effective uid of the PMC owner should match at least one * of the {effective,real,saved} uids of the target process. */ decline_attach = oc->cr_uid != tc->cr_uid && oc->cr_uid != tc->cr_svuid && oc->cr_uid != tc->cr_ruid; /* * Every one of the target's group ids, must be in the owner's * group list. */ for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); /* check the read and saved gids too */ if (decline_attach == 0) decline_attach = !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); return !decline_attach; } /* * Attach a process to a PMC. */ static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { int ri, error; char *fullpath, *freepath; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', * allocating space as needed. * * Verify that rowindex 'pm_rowindex' is free in the process * descriptor. * * If not, allocate space for a descriptor and link the * process descriptor and PMC. */ ri = PMC_TO_ROWINDEX(pm); /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) { error = ENOMEM; goto fail; } if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */ error = EEXIST; goto fail; } if (pp->pp_pmcs[ri].pp_pmc != NULL) { error = EBUSY; goto fail; } pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) pm->pm_flags |= PMC_F_NEEDS_LOGFILE; pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ /* issue an attach event to a configured log file */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { if (p->p_flag & P_KPROC) { fullpath = kernelname; freepath = NULL; } else { pmc_getfilename(p->p_textvp, &fullpath, &freepath); pmclog_process_pmcattach(pm, p->p_pid, fullpath); } free(freepath, M_TEMP); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } return (0); fail: PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return (error); } /* * Attach a process and optionally its children */ static int pmc_attach_process(struct proc *p, struct pmc *pm) { int error; struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * If this PMC successfully allowed a GETMSR operation * in the past, disallow further ATTACHes. */ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) return EPERM; if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_attach_one_process(p, pm); /* * Traverse all child processes, attaching them to * this PMC. */ sx_slock(&proctree_lock); top = p; for (;;) { if ((error = pmc_attach_one_process(p, pm)) != 0) break; if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } if (error) (void) pmc_detach_process(top, pm); done: sx_sunlock(&proctree_lock); return error; } /* * Detach a process from a PMC. If there are no other PMCs tracking * this process, remove the process structure from its hash table. If * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. */ static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) { int ri; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); ri = PMC_TO_ROWINDEX(pm); PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return ESRCH; if (pp->pp_pmcs[ri].pp_pmc != pm) return EINVAL; pmc_unlink_target_process(pm, pp); /* Issue a detach entry if a log file is configured */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcdetach(pm, p->p_pid); /* * If there are no PMCs targeting this process, we remove its * descriptor from the target hash table and unset the P_HWPMC * flag in the struct proc. */ KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal refcnt %d for process struct %p", __LINE__, pp->pp_refcnt, pp)); if (pp->pp_refcnt != 0) /* still a target of some PMC */ return 0; pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) pmc_destroy_process_descriptor(pp); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Detach a process and optionally its descendants from a PMC. */ static int pmc_detach_process(struct proc *p, struct pmc *pm) { struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); /* * Traverse all children, detaching them from this PMC. We * ignore errors since we could be detaching a PMC from a * partially attached proc tree. */ sx_slock(&proctree_lock); top = p; for (;;) { (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); if (LIST_EMPTY(&pm->pm_targets)) pm->pm_flags &= ~PMC_F_ATTACH_DONE; return 0; } /* * Thread context switch IN */ static void pmc_process_csw_in(struct thread *td) { int cpu; unsigned int adjri, ri; struct pmc *pm; struct proc *p; struct pmc_cpu *pc; struct pmc_hw *phw; pmc_value_t newvalue; struct pmc_process *pp; struct pmc_thread *pt; struct pmc_classdep *pcd; p = td->td_proc; pt = NULL; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; KASSERT(pp->pp_proc == td->td_proc, ("[pmc,%d] not my thread state", __LINE__)); critical_enter(); /* no preemption from this point */ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", __LINE__, PMC_TO_MODE(pm))); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need * be placed on hardware. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* increment PMC runcount */ counter_u64_add(pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); pcd->pcd_config_pmc(cpu, adjri, pm); phw = pc->pc_hwpmcs[ri]; KASSERT(phw != NULL, ("[pmc,%d] null hw pointer", __LINE__)); KASSERT(phw->phw_pmc == pm, ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, phw->phw_pmc, pm)); /* * Write out saved value and start the PMC. * * Sampling PMCs use a per-thread value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, use the per-thread * counter in the descriptor. If not, we will use * a per-process counter. * * TODO: Remove the per-process "safety net" once * we have thoroughly tested that we don't hit the * above assert. */ if (pt != NULL) { if (pt->pt_pmcs[ri].pt_pmcval > 0) newvalue = pt->pt_pmcs[ri].pt_pmcval; else newvalue = pm->pm_sc.pm_reloadcount; } else { /* * Use the saved value calculated after the most * recent time a thread using the shared counter * switched out. Reset the saved count in case * another thread from this process switches in * before any threads switch out. */ newvalue = pp->pp_pmcs[ri].pp_pmcval; pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); KASSERT(newvalue > 0 && newvalue <= pm->pm_sc.pm_reloadcount, ("[pmc,%d] pmcval outside of expected range cpu=%d " "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__, cpu, ri, newvalue, pm->pm_sc.pm_reloadcount)); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, PMC_TO_MODE(pm))); mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu, ri) = pm->pm_gv.pm_savedvalue; mtx_pool_unlock_spin(pmc_mtxpool, pm); } PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); pcd->pcd_write_pmc(cpu, adjri, newvalue); /* If a sampling mode PMC, reset stalled state. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; /* Start the PMC. */ pcd->pcd_start_pmc(cpu, adjri); } /* * perform any other architecture/cpu dependent thread * switch-in actions. */ (void) (*md->pmd_switch_in)(pc, pp); critical_exit(); } /* * Thread context switch OUT. */ static void pmc_process_csw_out(struct thread *td) { int cpu; int64_t tmp; struct pmc *pm; struct proc *p; enum pmc_mode mode; struct pmc_cpu *pc; pmc_value_t newvalue; unsigned int adjri, ri; struct pmc_process *pp; struct pmc_thread *pt = NULL; struct pmc_classdep *pcd; /* * Locate our process descriptor; this may be NULL if * this process is exiting and we have already removed * the process from the target process table. * * Note that due to kernel preemption, multiple * context switches may happen while the process is * exiting. * * Note also that if the target process cannot be * found we still need to deconfigure any PMCs that * are currently running on hardware. */ p = td->td_proc; pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); /* * save PMCs */ critical_enter(); cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; /* * When a PMC gets unlinked from a target PMC, it will * be removed from the target's pp_pmc[] array. * * However, on a MP system, the target could have been * executing on another CPU at the time of the unlink. * So, at context switch OUT time, we need to look at * the hardware to determine if a PMC is scheduled on * it. */ for (ri = 0; ri < md->pmd_npmc; ri++) { pcd = pmc_ri_to_classdep(md, ri, &adjri); pm = NULL; (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL) /* nothing at this row index */ continue; mode = PMC_TO_MODE(pm); if (!PMC_IS_VIRTUAL_MODE(mode)) continue; /* not a process virtual PMC */ KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Change desired state, and then stop if not stalled. * This two-step dance should avoid race conditions where * an interrupt re-enables the PMC after this code has * already checked the pm_stalled flag. */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_pcpu_state[cpu].pps_stalled == 0) pcd->pcd_stop_pmc(cpu, adjri); /* reduce this PMC's runcount */ counter_u64_add(pm->pm_runcount, -1); /* * If this PMC is associated with this process, * save the reading. */ if (pm->pm_state != PMC_STATE_DELETED && pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pp->pp_refcnt > 0, ("[pmc,%d] pp refcnt = %d", __LINE__, pp->pp_refcnt)); pcd->pcd_read_pmc(cpu, adjri, &newvalue); if (mode == PMC_MODE_TS) { PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)", cpu, ri, newvalue); if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, save the * per-thread counter in the descriptor. If not, * we will update the per-process counter. * * TODO: Remove the per-process "safety net" * once we have thoroughly tested that we * don't hit the above assert. */ if (pt != NULL) pt->pt_pmcs[ri].pt_pmcval = newvalue; else { /* * For sampling process-virtual PMCs, * newvalue is the number of events to * be seen until the next sampling * interrupt. We can just add the events * left from this invocation to the * counter, then adjust in case we * overflow our range. * * (Recall that we reload the counter * every time we use it.) */ pp->pp_pmcs[ri].pp_pmcval += newvalue; if (pp->pp_pmcs[ri].pp_pmcval > pm->pm_sc.pm_reloadcount) pp->pp_pmcs[ri].pp_pmcval -= pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)", cpu, ri, tmp); /* * For counting process-virtual PMCs, * we expect the count to be * increasing monotonically, modulo a 64 * bit wraparound. */ KASSERT(tmp >= 0, ("[pmc,%d] negative increment cpu=%d " "ri=%d newvalue=%jx saved=%jx " "incr=%jx", __LINE__, cpu, ri, newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); if (pm->pm_flags & PMC_F_LOG_PROCCSW) pmclog_process_proccsw(pm, pp, tmp, td); } } /* mark hardware as free */ pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * perform any other architecture/cpu dependent thread * switch out functions. */ (void) (*md->pmd_switch_out)(pc, pp); critical_exit(); } /* * A new thread for a process. */ static void pmc_process_thread_add(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE); } /* * A thread delete for a process. */ static void pmc_process_thread_delete(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc, td, PMC_FLAG_REMOVE)); } /* * A userret() call for a thread. */ static void pmc_process_thread_userret(struct thread *td) { sched_pin(); pmc_capture_user_callchain(curcpu, PMC_UR, td->td_frame); sched_unpin(); } /* * A mapping change for a process. */ static void pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) { int ri; pid_t pid; char *fullpath, *freepath; const struct pmc *pm; struct pmc_owner *po; const struct pmc_process *pp; freepath = fullpath = NULL; MPASS(!in_epoch()); pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; epoch_enter_preempt(global_epoch_preempt); /* Inform owners of all system-wide sampling PMCs. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; /* * Inform sampling PMC owners tracking this process. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); done: if (freepath) free(freepath, M_TEMP); epoch_exit_preempt(global_epoch_preempt); } /* * Log an munmap request. */ static void pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) { int ri; pid_t pid; struct pmc_owner *po; const struct pmc *pm; const struct pmc_process *pp; pid = td->td_proc->p_pid; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); epoch_exit_preempt(global_epoch_preempt); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } /* * Log mapping information about the kernel. */ static void pmc_log_kernel_mappings(struct pmc *pm) { struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; MPASS(in_epoch() || sx_xlocked(&pmc_sx)); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); po = pm->pm_owner; if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) return; /* * Log the current set of kernel modules. */ kmbase = linker_hwpmc_list_objects(); for (km = kmbase; km->pm_file != NULL; km++) { PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file, (void *) km->pm_address); pmclog_process_map_in(po, (pid_t) -1, km->pm_address, km->pm_file); } free(kmbase, M_LINKER); po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; } /* * Log the mappings for a single process. */ static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) { vm_map_t map; struct vnode *vp; struct vmspace *vm; vm_map_entry_t entry; vm_offset_t last_end; u_int last_timestamp; struct vnode *last_vp; vm_offset_t start_addr; vm_object_t obj, lobj, tobj; char *fullpath, *freepath; last_vp = NULL; last_end = (vm_offset_t) 0; fullpath = freepath = NULL; if ((vm = vmspace_acquire_ref(p)) == NULL) return; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry == NULL) { PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " "NULL! pid=%d vm_map=%p\n", p->p_pid, map); break; } /* * We only care about executable map entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || !(entry->protection & VM_PROT_EXECUTE) || (entry->object.vm_object == NULL)) { continue; } obj = entry->object.vm_object; VM_OBJECT_RLOCK(obj); /* * Walk the backing_object list to find the base * (non-shadowed) vm_object. */ for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } /* * At this point lobj is the base vm_object and it is locked. */ if (lobj == NULL) { PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); VM_OBJECT_RUNLOCK(obj); continue; } vp = vm_object_vnode(lobj); if (vp == NULL) { if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * Skip contiguous regions that point to the same * vnode, so we don't emit redundant MAP-IN * directives. */ if (entry->start == last_end && vp == last_vp) { last_end = entry->end; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * We don't want to keep the proc's vm_map or this * vm_object locked while we walk the pathname, since * vn_fullpath() can sleep. However, if we drop the * lock, it's possible for concurrent activity to * modify the vm_map list. To protect against this, * we save the vm_map timestamp before we release the * lock, and check it after we reacquire the lock * below. */ start_addr = entry->start; last_end = entry->end; last_timestamp = map->timestamp; vm_map_unlock_read(map); vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); freepath = NULL; pmc_getfilename(vp, &fullpath, &freepath); last_vp = vp; vrele(vp); vp = NULL; pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); if (freepath) free(freepath, M_TEMP); vm_map_lock_read(map); /* * If our saved timestamp doesn't match, this means * that the vm_map was modified out from under us and * we can't trust our current "entry" pointer. Do a * new lookup for this entry. If there is no entry * for this address range, vm_map_lookup_entry() will * return the previous one, so we always want to go to * entry->next on the next loop iteration. * * There is an edge condition here that can occur if * there is no entry at or before this address. In * this situation, vm_map_lookup_entry returns * &map->header, which would cause our loop to abort * without processing the rest of the map. However, * in practice this will never happen for process * vm_map. This is because the executable's text * segment is the first mapping in the proc's address * space, and this mapping is never removed until the * process exits, so there will always be a non-header * entry at or before the requested address for * vm_map_lookup_entry to return. */ if (map->timestamp != last_timestamp) vm_map_lookup_entry(map, last_end - 1, &entry); } vm_map_unlock_read(map); vmspace_free(vm); return; } /* * Log mappings for all processes in the system. */ static void pmc_log_all_process_mappings(struct pmc_owner *po) { struct proc *p, *top; sx_assert(&pmc_sx, SX_XLOCKED); if ((p = pfind(1)) == NULL) panic("[pmc,%d] Cannot find init", __LINE__); PROC_UNLOCK(p); sx_slock(&proctree_lock); top = p; for (;;) { pmc_log_process_mappings(po, p); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); } /* * The 'hook' invoked from the kernel proper */ #ifdef HWPMC_DEBUG const char *pmc_hooknames[] = { /* these strings correspond to PMC_FN_* in */ "", "EXEC", "CSW-IN", "CSW-OUT", "SAMPLE", "UNUSED1", "UNUSED2", "MMAP", "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", "SOFTSAMPLING", "THR-CREATE", "THR-EXIT", "THR-USERRET", "THR-CREATE-LOG", "THR-EXIT-LOG", "PROC-CREATE-LOG" }; #endif static int pmc_hook_handler(struct thread *td, int function, void *arg) { int cpu; PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); switch (function) { /* * Process exec() */ case PMC_FN_PROCESS_EXEC: { char *fullpath, *freepath; unsigned int ri; int is_using_hwpmcs; struct pmc *pm; struct proc *p; struct pmc_owner *po; struct pmc_process *pp; struct pmckern_procexec *pk; sx_assert(&pmc_sx, SX_XLOCKED); p = td->td_proc; pmc_getfilename(p->p_textvp, &fullpath, &freepath); pk = (struct pmckern_procexec *) arg; epoch_enter_preempt(global_epoch_preempt); /* Inform owners of SS mode PMCs of the exec event. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); epoch_exit_preempt(global_epoch_preempt); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); if (!is_using_hwpmcs) { if (freepath) free(freepath, M_TEMP); break; } /* * PMCs are not inherited across an exec(): remove any * PMCs that this process is the owner of. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } /* * If the process being exec'ed is not the target of any * PMC, we are done. */ if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { if (freepath) free(freepath, M_TEMP); break; } /* * Log the exec event to all monitoring owners. Skip * owners who have already received the event because * they had system sampling PMCs active. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, pm->pm_id, p->p_pid, pk->pm_entryaddr, fullpath); } if (freepath) free(freepath, M_TEMP); PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", p, p->p_pid, p->p_comm, pk->pm_credentialschanged); if (pk->pm_credentialschanged == 0) /* no change */ break; /* * If the newly exec()'ed process has a different credential * than before, allow it to be the target of a PMC only if * the PMC's owner has sufficient privilege. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) if (pmc_can_attach(pm, td->td_proc) != 0) pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, pp->pp_refcnt, pp)); /* * If this process is no longer the target of any * PMCs, we can remove the process entry and free * up space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); break; } } break; case PMC_FN_CSW_IN: pmc_process_csw_in(td); break; case PMC_FN_CSW_OUT: pmc_process_csw_out(td); break; /* * Process accumulated PC samples. * * This function is expected to be called by hardclock() for * each CPU that has accumulated PC samples. * * This function is to be executed on the CPU whose samples * are being processed. */ case PMC_FN_DO_SAMPLES: /* * Clear the cpu specific bit in the CPU mask before * do the rest of the processing. If the NMI handler * gets invoked after the "atomic_clear_int()" call * below but before "pmc_process_samples()" gets * around to processing the interrupt, then we will * come back here at the next hardclock() tick (and * may find nothing to do if "pmc_process_samples()" * had already processed the interrupt). We don't * lose the interrupt sample. */ DPCPU_SET(pmc_sampled, 0); cpu = PCPU_GET(cpuid); pmc_process_samples(cpu, PMC_HR); pmc_process_samples(cpu, PMC_SR); pmc_process_samples(cpu, PMC_UR); break; case PMC_FN_MMAP: pmc_process_mmap(td, (struct pmckern_map_in *) arg); break; case PMC_FN_MUNMAP: MPASS(in_epoch() || sx_xlocked(&pmc_sx)); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; case PMC_FN_PROC_CREATE_LOG: pmc_process_proccreate((struct proc *)arg); break; case PMC_FN_USER_CALLCHAIN: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *) arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_USER_CALLCHAIN_SOFT: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); cpu = PCPU_GET(cpuid); pmc_capture_user_callchain(cpu, PMC_SR, (struct trapframe *) arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_SOFT_SAMPLING: /* * Call soft PMC sampling intr. */ pmc_soft_intr((struct pmckern_soft *) arg); break; case PMC_FN_THR_CREATE: pmc_process_thread_add(td); pmc_process_threadcreate(td); break; case PMC_FN_THR_CREATE_LOG: pmc_process_threadcreate(td); break; case PMC_FN_THR_EXIT: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_delete(td); pmc_process_threadexit(td); break; case PMC_FN_THR_EXIT_LOG: pmc_process_threadexit(td); break; case PMC_FN_THR_USERRET: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_userret(td); break; default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); #endif break; } return 0; } /* * allocate a 'struct pmc_owner' descriptor in the owner hash table. */ static struct pmc_owner * pmc_allocate_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; /* allocate space for N pointers and one descriptor struct */ po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO); po->po_owner = p; LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ TAILQ_INIT(&po->po_logbuffers); mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", p, p->p_pid, p->p_comm, po); return po; } static void pmc_destroy_owner_descriptor(struct pmc_owner *po) { PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); mtx_destroy(&po->po_mtx); free(po, M_PMC); } /* * Allocate a thread descriptor from the free pool. * * NOTE: This *can* return NULL. */ static struct pmc_thread * pmc_thread_descriptor_pool_alloc(void) { struct pmc_thread *pt; mtx_lock_spin(&pmc_threadfreelist_mtx); if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { LIST_REMOVE(pt, pt_next); pmc_threadfreelist_entries--; } mtx_unlock_spin(&pmc_threadfreelist_mtx); return (pt); } /* * Add a thread descriptor to the free pool. We use this instead of free() * to maintain a cache of free entries. Additionally, we can safely call * this function when we cannot call free(), such as in a critical section. * */ static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt) { if (pt == NULL) return; memset(pt, 0, THREADENTRY_SIZE); mtx_lock_spin(&pmc_threadfreelist_mtx); LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next); pmc_threadfreelist_entries++; if (pmc_threadfreelist_entries > pmc_threadfreelist_max) GROUPTASK_ENQUEUE(&free_gtask); mtx_unlock_spin(&pmc_threadfreelist_mtx); } /* * A callout to manage the free list. */ static void pmc_thread_descriptor_pool_free_task(void *arg __unused) { struct pmc_thread *pt; LIST_HEAD(, pmc_thread) tmplist; int delta; LIST_INIT(&tmplist); /* Determine what changes, if any, we need to make. */ mtx_lock_spin(&pmc_threadfreelist_mtx); delta = pmc_threadfreelist_entries - pmc_threadfreelist_max; while (delta > 0 && (pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { delta--; LIST_REMOVE(pt, pt_next); LIST_INSERT_HEAD(&tmplist, pt, pt_next); } mtx_unlock_spin(&pmc_threadfreelist_mtx); /* If there are entries to free, free them. */ while (!LIST_EMPTY(&tmplist)) { pt = LIST_FIRST(&tmplist); LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * Drain the thread free pool, freeing all allocations. */ static void pmc_thread_descriptor_pool_drain() { struct pmc_thread *pt, *next; LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) { LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * find the descriptor corresponding to thread 'td', adding or removing it * as specified by 'mode'. * * Note that this supports additional mode flags in addition to those * supported by pmc_find_process_descriptor(): * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs. * This makes it safe to call while holding certain other locks. */ static struct pmc_thread * pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode) { struct pmc_thread *pt = NULL, *ptnew = NULL; int wait_flag; KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__)); /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to * acquiring the lock. */ if (mode & PMC_FLAG_ALLOCATE) { if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { wait_flag = M_WAITOK; if ((mode & PMC_FLAG_NOWAIT) || in_epoch()) wait_flag = M_NOWAIT; ptnew = malloc(THREADENTRY_SIZE, M_PMC, wait_flag|M_ZERO); } } mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) if (pt->pt_td == td) break; if ((mode & PMC_FLAG_REMOVE) && pt != NULL) LIST_REMOVE(pt, pt_next); if ((mode & PMC_FLAG_ALLOCATE) && pt == NULL && ptnew != NULL) { pt = ptnew; ptnew = NULL; pt->pt_td = td; LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next); } mtx_unlock_spin(pp->pp_tdslock); if (ptnew != NULL) { free(ptnew, M_PMC); } return pt; } /* * Try to add thread descriptors for each thread in a process. */ static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp) { struct thread *curtd; struct pmc_thread **tdlist; int i, tdcnt, tdlistsz; KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked", __LINE__)); tdcnt = 32; restart: tdlistsz = roundup2(tdcnt, 32); tdcnt = 0; tdlist = malloc(sizeof(struct pmc_thread*) * tdlistsz, M_TEMP, M_WAITOK); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, curtd) tdcnt++; if (tdcnt >= tdlistsz) { PROC_UNLOCK(p); free(tdlist, M_TEMP); goto restart; } /* * Try to add each thread to the list without sleeping. If unable, * add to a queue to retry after dropping the process lock. */ tdcnt = 0; FOREACH_THREAD_IN_PROC(p, curtd) { tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd, PMC_FLAG_ALLOCATE|PMC_FLAG_NOWAIT); if (tdlist[tdcnt] == NULL) { PROC_UNLOCK(p); for (i = 0; i <= tdcnt; i++) pmc_thread_descriptor_pool_free(tdlist[i]); free(tdlist, M_TEMP); goto restart; } tdcnt++; } PROC_UNLOCK(p); free(tdlist, M_TEMP); } /* * find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ static struct pmc_process * pmc_find_process_descriptor(struct proc *p, uint32_t mode) { uint32_t hindex; struct pmc_process *pp, *ppnew; struct pmc_processhash *pph; hindex = PMC_HASH_PTR(p, pmc_processhashmask); pph = &pmc_processhash[hindex]; ppnew = NULL; /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if (mode & PMC_FLAG_ALLOCATE) ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO); mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) if (pp->pp_proc == p) break; if ((mode & PMC_FLAG_REMOVE) && pp != NULL) LIST_REMOVE(pp, pp_next); if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; LIST_INIT(&ppnew->pp_tds); ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew); LIST_INSERT_HEAD(pph, ppnew, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); pp = ppnew; ppnew = NULL; /* Add thread descriptors for this process' current threads. */ pmc_add_thread_descriptors_from_proc(p, pp); } else mtx_unlock_spin(&pmc_processhash_mtx); if (ppnew != NULL) free(ppnew, M_PMC); return pp; } /* * remove a process descriptor from the process hash table. */ static void pmc_remove_process_descriptor(struct pmc_process *pp) { KASSERT(pp->pp_refcnt == 0, ("[pmc,%d] Removing process descriptor %p with count %d", __LINE__, pp, pp->pp_refcnt)); mtx_lock_spin(&pmc_processhash_mtx); LIST_REMOVE(pp, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); } /* * destroy a process descriptor. */ static void pmc_destroy_process_descriptor(struct pmc_process *pp) { struct pmc_thread *pmc_td; while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) { LIST_REMOVE(pmc_td, pt_next); pmc_thread_descriptor_pool_free(pmc_td); } free(pp, M_PMC); } /* * find an owner descriptor corresponding to proc 'p' */ static struct pmc_owner * pmc_find_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; po = NULL; LIST_FOREACH(po, poh, po_next) if (po->po_owner == p) break; PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); return po; } /* * pmc_allocate_pmc_descriptor * * Allocate a pmc descriptor and initialize its * fields. */ static struct pmc * pmc_allocate_pmc_descriptor(void) { struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); pmc->pm_runcount = counter_u64_alloc(M_WAITOK); pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO); PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return pmc; } /* * Destroy a pmc descriptor. */ static void pmc_destroy_pmc_descriptor(struct pmc *pm) { KASSERT(pm->pm_state == PMC_STATE_DELETED || pm->pm_state == PMC_STATE_FREE, ("[pmc,%d] destroying non-deleted PMC", __LINE__)); KASSERT(LIST_EMPTY(&pm->pm_targets), ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) == 0, ("[pmc,%d] pmc has non-zero run count %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); counter_u64_free(pm->pm_runcount); free(pm->pm_pcpu_state, M_PMC); free(pm, M_PMC); } static void pmc_wait_for_pmc_idle(struct pmc *pm) { #ifdef HWPMC_DEBUG volatile int maxloop; maxloop = 100 * pmc_cpu_max(); #endif /* * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ pmclog_flush(pm->pm_owner, 1); while (counter_u64_fetch(pm->pm_runcount) > 0) { pmclog_flush(pm->pm_owner, 1); #ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%ld) waiting too long for " "pmc to be free", __LINE__, PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount))); #endif pmc_force_context_switch(); } } /* * This function does the following things: * * - detaches the PMC from hardware * - unlinks all target threads that were attached to it * - removes the PMC from its owner's list * - destroys the PMC private mutex * * Once this function completes, the given pmc pointer can be freed by * calling pmc_destroy_pmc_descriptor(). */ static void pmc_release_pmc_descriptor(struct pmc *pm) { enum pmc_mode mode; struct pmc_hw *phw; u_int adjri, ri, cpu; struct pmc_owner *po; struct pmc_binding pb; struct pmc_process *pp; struct pmc_classdep *pcd; struct pmc_target *ptgt, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mode = PMC_TO_MODE(pm); PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, mode); /* * First, we take the PMC off hardware. */ cpu = 0; if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch * to this CPU and turn hardware off. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_state == PMC_STATE_RUNNING && pm->pm_pcpu_state[cpu].pps_stalled == 0) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; KASSERT(phw->phw_pmc == pm, ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", __LINE__, ri, phw->phw_pmc, pm)); PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_stop_pmc(cpu, adjri); critical_exit(); } PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_config_pmc(cpu, adjri, NULL); critical_exit(); /* adjust the global and process count of SS mode PMCs */ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { po = pm->pm_owner; po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); } } pm->pm_state = PMC_STATE_DELETED; pmc_restore_cpu_binding(&pb); /* * We could have references to this PMC structure in * the per-cpu sample queues. Wait for the queue to * drain. */ pmc_wait_for_pmc_idle(pm); } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at * a given instant. * * By marking its state as DELETED, we ensure that * this PMC is never further scheduled on hardware. * * Then we wait till all CPUs are done with this PMC. */ pm->pm_state = PMC_STATE_DELETED; /* Wait for the PMCs runcount to come to zero. */ pmc_wait_for_pmc_idle(pm); /* * At this point the PMC is off all CPUs and cannot be * freshly scheduled onto a CPU. It is now safe to * unlink all targets from this PMC. If a * process-record's refcount falls to zero, we remove * it from the hash table. The module-wide SX lock * protects us from races. */ LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { pp = ptgt->pt_process; pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); /* * If the target process record shows that no * PMCs are attached to it, reclaim its space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); } } cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ } /* * Release any MD resources */ (void) pcd->pcd_release_pmc(cpu, adjri, pm); /* * Update row disposition */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); /* unlink from the owner's list */ if (pm->pm_owner) { LIST_REMOVE(pm, pm_next); pm->pm_owner = NULL; } } /* * Register an owner and a pmc. */ static int pmc_register_owner(struct proc *p, struct pmc *pmc) { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) return ENOMEM; KASSERT(pmc->pm_owner == NULL, ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); pmc->pm_owner = po; LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcallocate(pmc); PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", po, pmc); return 0; } /* * Return the current row disposition: * == 0 => FREE * > 0 => PROCESS MODE * < 0 => SYSTEM MODE */ int pmc_getrowdisp(int ri) { return pmc_pmcdisp[ri]; } /* * Check if a PMC at row index 'ri' can be allocated to the current * process. * * Allocation can fail if: * - the current process is already being profiled by a PMC at index 'ri', * attached to it via OP_PMCATTACH. * - the current process has already allocated a PMC at index 'ri' * via OP_ALLOCATE. */ static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { enum pmc_mode mode; struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); /* * We shouldn't have already allocated a process-mode PMC at * row index 'ri'. * * We shouldn't have allocated a system-wide PMC on the same * CPU and same RI. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (PMC_TO_ROWINDEX(pm) == ri) { mode = PMC_TO_MODE(pm); if (PMC_IS_VIRTUAL_MODE(mode)) return EEXIST; if (PMC_IS_SYSTEM_MODE(mode) && (int) PMC_TO_CPU(pm) == cpu) return EEXIST; } } /* * We also shouldn't be the target of any PMC at this index * since otherwise a PMC_ATTACH to ourselves will fail. */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc) return EEXIST; PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", p, p->p_pid, p->p_comm, ri); return 0; } /* * Check if a given PMC at row index 'ri' can be currently used in * mode 'mode'. */ static int pmc_can_allocate_row(int ri, enum pmc_mode mode) { enum pmc_disp disp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); if (PMC_IS_SYSTEM_MODE(mode)) disp = PMC_DISP_STANDALONE; else disp = PMC_DISP_THREAD; /* * check disposition for PMC row 'ri': * * Expected disposition Row-disposition Result * * STANDALONE STANDALONE or FREE proceed * STANDALONE THREAD fail * THREAD THREAD or FREE proceed * THREAD STANDALONE fail */ if (!PMC_ROW_DISP_IS_FREE(ri) && !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) return EBUSY; /* * All OK */ PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); return 0; } /* * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc *pm; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_id == pmcid) return pm; return NULL; } static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) { struct pmc *pm, *opm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid); if (PMC_ID_TO_ROWINDEX(pmcid) >= md->pmd_npmc) return (EINVAL); if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) { /* * In case of PMC_F_DESCENDANTS child processes we will not find * the current process in the owners hash list. Find the owner * process first and from there lookup the po. */ if ((pp = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) { return ESRCH; } else { opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc; if (opm == NULL) return ESRCH; if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) return ESRCH; po = opm->pm_owner; } } if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) return EINVAL; PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); *pmc = pm; return 0; } /* * Start a PMC. */ static int pmc_start(struct pmc *pm) { enum pmc_mode mode; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, error, cpu, ri; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); mode = PMC_TO_MODE(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); error = 0; PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); po = pm->pm_owner; /* * Disallow PMCSTART if a logfile is required but has not been * configured yet. */ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EDOOFUS); /* programming error */ /* * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH has never been done on this PMC, * attach it to its owner process. */ if (LIST_EMPTY(&pm->pm_targets)) error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : pmc_attach_process(po->po_owner, pm); /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ if (error == 0) { pm->pm_state = PMC_STATE_RUNNING; if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) pmc_force_context_switch(); } return (error); } /* * A system-wide PMC. * * Add the owner to the global list if this is a system-wide * sampling PMC. */ if (mode == PMC_MODE_SS) { /* * Log mapping information for all existing processes in the * system. Subsequent mappings are logged as they happen; * see pmc_process_mmap(). */ if (po->po_logprocmaps == 0) { pmc_log_all_process_mappings(po); po->po_logprocmaps = 1; } po->po_sscount++; if (po->po_sscount == 1) { atomic_add_rel_int(&pmc_ss_count, 1); CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); PMCDBG1(PMC,OPS,1, "po=%p in global list", po); } } /* * Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); /* * global PMCs are configured at allocation time * so write out the initial value and start the PMC. */ pm->pm_state = PMC_STATE_RUNNING; critical_enter(); if ((error = pcd->pcd_write_pmc(cpu, adjri, PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial)) == 0) { /* If a sampling mode PMC, reset stalled state. */ if (PMC_IS_SAMPLING_MODE(mode)) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. Start it. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; error = pcd->pcd_start_pmc(cpu, adjri); } critical_exit(); pmc_restore_cpu_binding(&pb); return (error); } /* * Stop a PMC. */ static int pmc_stop(struct pmc *pm) { struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, cpu, error, ri; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; /* * If the PMC is a virtual mode one, changing the state to * non-RUNNING is enough to ensure that the PMC never gets * scheduled. * * If this PMC is current running on a CPU, then it will * handled correctly at the time its target process is context * switched out. */ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return 0; /* * A system-mode PMC. Move to the CPU associated with * this PMC, and stop the hardware. We update the * 'initial count' so that a subsequent PMCSTART will * resume counting from the current hardware count. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (!pmc_cpu_is_active(cpu)) return ENXIO; pmc_select_cpu(cpu); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); pm->pm_pcpu_state[cpu].pps_cpustate = 0; critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); critical_exit(); pmc_restore_cpu_binding(&pb); po = pm->pm_owner; /* remove this owner from the global list of SS PMC owners */ if (PMC_TO_MODE(pm) == PMC_MODE_SS) { po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); } } return (error); } static struct pmc_classdep * pmc_class_to_classdep(enum pmc_class class) { int n; for (n = 0; n < md->pmd_nclass; n++) if (md->pmd_classdep[n].pcd_class == class) return (&md->pmd_classdep[n]); return (NULL); } #if defined(HWPMC_DEBUG) && defined(KTR) static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , __PMC_OPS() NULL }; #endif /* * The syscall interface */ #define PMC_GET_SX_XLOCK(...) do { \ sx_xlock(&pmc_sx); \ if (pmc_hook == NULL) { \ sx_xunlock(&pmc_sx); \ return __VA_ARGS__; \ } \ } while (0) #define PMC_DOWNGRADE_SX() do { \ sx_downgrade(&pmc_sx); \ is_sx_downgraded = 1; \ } while (0) static int pmc_syscall_handler(struct thread *td, void *syscall_args) { int error, is_sx_downgraded, op; struct pmc_syscall_args *c; void *pmclog_proc_handle; void *arg; c = (struct pmc_syscall_args *)syscall_args; op = c->pmop_code; arg = c->pmop_data; /* PMC isn't set up yet */ if (pmc_hook == NULL) return (EINVAL); if (op == PMC_OP_CONFIGURELOG) { /* * We cannot create the logging process inside * pmclog_configure_log() because there is a LOR * between pmc_sx and process structure locks. * Instead, pre-create the process and ignite the loop * if everything is fine, otherwise direct the process * to exit. */ error = pmclog_proc_create(td, &pmclog_proc_handle); if (error != 0) goto done_syscall; } PMC_GET_SX_XLOCK(ENOSYS); is_sx_downgraded = 0; PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); error = 0; counter_u64_add(pmc_stats.pm_syscalls, 1); switch (op) { /* * Configure a log file. * * XXX This OP will be reworked. */ case PMC_OP_CONFIGURELOG: { struct proc *p; struct pmc *pm; struct pmc_owner *po; struct pmc_op_configurelog cl; if ((error = copyin(arg, &cl, sizeof(cl))) != 0) { pmclog_proc_ignite(pmclog_proc_handle, NULL); break; } /* mark this process as owning a log file */ p = td->td_proc; if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = ENOMEM; break; } /* * If a valid fd was passed in, try to configure that, * otherwise if 'fd' was less than zero and there was * a log file configured, flush its buffers and * de-configure it. */ if (cl.pm_logfd >= 0) { error = pmclog_configure_log(md, po, cl.pm_logfd); pmclog_proc_ignite(pmclog_proc_handle, error == 0 ? po : NULL); } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = pmclog_close(po); if (error == 0) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && pm->pm_state == PMC_STATE_RUNNING) pmc_stop(pm); error = pmclog_deconfigure_log(po); } } else { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = EINVAL; } } break; /* * Flush a log file. */ case PMC_OP_FLUSHLOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_flush(po, 0); } break; /* * Close a log file. */ case PMC_OP_CLOSELOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_close(po); } break; /* * Retrieve hardware configuration. */ case PMC_OP_GETCPUINFO: /* CPU information */ { struct pmc_op_getcpuinfo gci; struct pmc_classinfo *pci; struct pmc_classdep *pcd; int cl; gci.pm_cputype = md->pmd_cputype; gci.pm_ncpu = pmc_cpu_max(); gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; pci = gci.pm_classes; pcd = md->pmd_classdep; for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { pci->pm_caps = pcd->pcd_caps; pci->pm_class = pcd->pcd_class; pci->pm_width = pcd->pcd_width; pci->pm_num = pcd->pcd_num; } error = copyout(&gci, arg, sizeof(gci)); } break; /* * Retrieve soft events list. */ case PMC_OP_GETDYNEVENTINFO: { enum pmc_class cl; enum pmc_event ev; struct pmc_op_getdyneventinfo *gei; struct pmc_dyn_event_descr dev; struct pmc_soft *ps; uint32_t nevent; sx_assert(&pmc_sx, SX_LOCKED); gei = (struct pmc_op_getdyneventinfo *) arg; if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) break; /* Only SOFT class is dynamic. */ if (cl != PMC_CLASS_SOFT) { error = EINVAL; break; } nevent = 0; for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { ps = pmc_soft_ev_acquire(ev); if (ps == NULL) continue; bcopy(&ps->ps_ev, &dev, sizeof(dev)); pmc_soft_ev_release(ps); error = copyout(&dev, &gei->pm_events[nevent], sizeof(struct pmc_dyn_event_descr)); if (error != 0) break; nevent++; } if (error != 0) break; error = copyout(&nevent, &gei->pm_nevent, sizeof(nevent)); } break; /* * Get module statistics */ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; #define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field) CFETCH(gms, pmc_stats, pm_intr_ignored); CFETCH(gms, pmc_stats, pm_intr_processed); CFETCH(gms, pmc_stats, pm_intr_bufferfull); CFETCH(gms, pmc_stats, pm_syscalls); CFETCH(gms, pmc_stats, pm_syscall_errors); CFETCH(gms, pmc_stats, pm_buffer_requests); CFETCH(gms, pmc_stats, pm_buffer_requests_failed); CFETCH(gms, pmc_stats, pm_log_sweeps); #undef CFETCH error = copyout(&gms, arg, sizeof(gms)); } break; /* * Retrieve module version number */ case PMC_OP_GETMODULEVERSION: { uint32_t cv, modv; /* retrieve the client's idea of the ABI version */ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) break; /* don't service clients newer than our driver */ modv = PMC_VERSION; if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { error = EPROGMISMATCH; break; } error = copyout(&modv, arg, sizeof(int)); } break; /* * Retrieve the state of all the PMCs on a given * CPU. */ case PMC_OP_GETPMCINFO: { int ari; struct pmc *pm; size_t pmcinfo_size; uint32_t cpu, n, npmc; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_info *p, *pmcinfo; struct pmc_op_getpmcinfo *gpi; PMC_DOWNGRADE_SX(); gpi = (struct pmc_op_getpmcinfo *) arg; if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) break; if (cpu >= pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* switch to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); npmc = md->pmd_npmc; pmcinfo_size = npmc * sizeof(struct pmc_info); pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); p = pmcinfo; for (n = 0; n < md->pmd_npmc; n++, p++) { pcd = pmc_ri_to_classdep(md, n, &ari); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) break; if (PMC_ROW_DISP_IS_STANDALONE(n)) p->pm_rowdisp = PMC_DISP_STANDALONE; else if (PMC_ROW_DISP_IS_THREAD(n)) p->pm_rowdisp = PMC_DISP_THREAD; else p->pm_rowdisp = PMC_DISP_FREE; p->pm_ownerpid = -1; if (pm == NULL) /* no PMC associated */ continue; po = pm->pm_owner; KASSERT(po->po_owner != NULL, ("[pmc,%d] pmc_owner had a null proc pointer", __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } pmc_restore_cpu_binding(&pb); /* now copy out the PMC info collected */ if (error == 0) error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); free(pmcinfo, M_PMC); } break; /* * Set the administrative state of a PMC. I.e. whether * the PMC is to be used or not. */ case PMC_OP_PMCADMIN: { int cpu, ri; enum pmc_state request; struct pmc_cpu *pc; struct pmc_hw *phw; struct pmc_op_pmcadmin pma; struct pmc_binding pb; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); error = priv_check(td, PRIV_PMC_MANAGE); if (error) break; if ((error = copyin(arg, &pma, sizeof(pma))) != 0) break; cpu = pma.pm_cpu; if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } request = pma.pm_state; if (request != PMC_STATE_DISABLED && request != PMC_STATE_FREE) { error = EINVAL; break; } ri = pma.pm_pmc; /* pmc id == row index */ if (ri < 0 || ri >= (int) md->pmd_npmc) { error = EINVAL; break; } /* * We can't disable a PMC with a row-index allocated * for process virtual PMCs. */ if (PMC_ROW_DISP_IS_THREAD(ri) && request == PMC_STATE_DISABLED) { error = EBUSY; break; } /* * otherwise, this PMC on this CPU is either free or * in system-wide mode. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); pc = pmc_pcpu[cpu]; phw = pc->pc_hwpmcs[ri]; /* * XXX do we need some kind of 'forced' disable? */ if (phw->phw_pmc == NULL) { if (request == PMC_STATE_DISABLED && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; PMC_MARK_ROW_STANDALONE(ri); } else if (request == PMC_STATE_FREE && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; PMC_UNMARK_ROW_STANDALONE(ri); } /* other cases are a no-op */ } else error = EBUSY; pmc_restore_cpu_binding(&pb); } break; /* * Allocate a PMC. */ case PMC_OP_PMCALLOCATE: { int adjri, n; u_int cpu; uint32_t caps; struct pmc *pmc; enum pmc_mode mode; struct pmc_hw *phw; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_op_pmcallocate pa; if ((error = copyin(arg, &pa, sizeof(pa))) != 0) break; caps = pa.pm_caps; mode = pa.pm_mode; cpu = pa.pm_cpu; if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && mode != PMC_MODE_TS && mode != PMC_MODE_TC) || (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { error = EINVAL; break; } /* * Virtual PMCs should only ask for a default CPU. * System mode PMCs need to specify a non-default CPU. */ if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { error = EINVAL; break; } /* * Check that an inactive CPU is not being asked for. */ if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* * Refuse an allocation for a system-wide PMC if this * process has been jailed, or if this process lacks * super-user credentials and the sysctl tunable * 'security.bsd.unprivileged_syspmcs' is zero. */ if (PMC_IS_SYSTEM_MODE(mode)) { if (jailed(curthread->td_ucred)) { error = EPERM; break; } if (!pmc_unprivileged_syspmcs) { error = priv_check(curthread, PRIV_PMC_SYSTEM); if (error) break; } } /* * Look for valid values for 'pm_flags' */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) != 0) { error = EINVAL; break; } /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */ if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) == PMC_F_USERCALLCHAIN) { error = EINVAL; break; } /* PMC_F_USERCALLCHAIN is only valid for sampling mode */ if (pa.pm_flags & PMC_F_USERCALLCHAIN && mode != PMC_MODE_TS && mode != PMC_MODE_SS) { error = EINVAL; break; } /* process logging options are not allowed for system PMCs */ if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { error = EINVAL; break; } /* * All sampling mode PMCs need to be able to interrupt the * CPU. */ if (PMC_IS_SAMPLING_MODE(mode)) caps |= PMC_CAP_INTERRUPT; /* A valid class specifier should have been passed in. */ pcd = pmc_class_to_classdep(pa.pm_class); if (pcd == NULL) { error = EINVAL; break; } /* The requested PMC capabilities should be feasible. */ if ((pcd->pcd_caps & caps) != caps) { error = EOPNOTSUPP; break; } PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", pa.pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, PMC_ID_INVALID); pmc->pm_event = pa.pm_ev; pmc->pm_state = PMC_STATE_FREE; pmc->pm_caps = caps; pmc->pm_flags = pa.pm_flags; + /* XXX set lower bound on sampling for process counters */ + if (PMC_IS_SAMPLING_MODE(mode)) + pmc->pm_sc.pm_reloadcount = pa.pm_count; + else + pmc->pm_sc.pm_initial = pa.pm_count; + /* switch thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); #define PMC_IS_SHAREABLE_PMC(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ PMC_PHW_FLAG_IS_SHAREABLE) #define PMC_IS_UNALLOCATED(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) if (PMC_IS_SYSTEM_MODE(mode)) { pmc_select_cpu(cpu); for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, cpu) == 0 && (PMC_IS_UNALLOCATED(cpu, n) || PMC_IS_SHAREABLE_PMC(cpu, n)) && pcd->pcd_allocate_pmc(cpu, adjri, pmc, &pa) == 0) break; } } else { /* Process virtual mode */ for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, PMC_CPU_ANY) == 0 && pcd->pcd_allocate_pmc(curthread->td_oncpu, adjri, pmc, &pa) == 0) break; } } #undef PMC_IS_UNALLOCATED #undef PMC_IS_SHAREABLE_PMC pmc_restore_cpu_binding(&pb); if (n == (int) md->pmd_npmc) { pmc_destroy_pmc_descriptor(pmc); pmc = NULL; error = EINVAL; break; } /* Fill in the correct value in the ID field */ pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); /* Process mode PMCs with logging enabled need log files */ if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* All system mode sampling PMCs require a log file */ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* * Configure global pmc's immediately */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; pcd = pmc_ri_to_classdep(md, n, &adjri); if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { (void) pcd->pcd_release_pmc(cpu, adjri, pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; pmc_restore_cpu_binding(&pb); error = EPERM; break; } pmc_restore_cpu_binding(&pb); } pmc->pm_state = PMC_STATE_ALLOCATED; pmc->pm_class = pa.pm_class; /* * mark row disposition */ if (PMC_IS_SYSTEM_MODE(mode)) PMC_MARK_ROW_STANDALONE(n); else PMC_MARK_ROW_THREAD(n); /* * Register this PMC with the current thread as its owner. */ if ((error = pmc_register_owner(curthread->td_proc, pmc)) != 0) { pmc_release_pmc_descriptor(pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; break; } if (mode == PMC_MODE_SS) pmc_process_allproc(pmc); /* * Return the allocated index. */ pa.pm_pmcid = pmc->pm_id; error = copyout(&pa, arg, sizeof(pa)); } break; /* * Attach a PMC to a process. */ case PMC_OP_PMCATTACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { error = EINVAL; break; } /* PMCs may be (re)attached only when allocated or stopped */ if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } else if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED) { error = EINVAL; break; } /* lookup pid */ if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Ignore processes that are working on exiting. */ if (p->p_flag & P_WEXIT) { error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ break; } /* * we are allowed to attach a PMC to a process if * we can debug it. */ error = p_candebug(curthread, p); PROC_UNLOCK(p); if (error == 0) error = pmc_attach_process(p, pm); } break; /* * Detach an attached PMC from a process. */ case PMC_OP_PMCDETACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Treat processes that are in the process of exiting * as if they were not present. */ if (p->p_flag & P_WEXIT) error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ if (error == 0) error = pmc_detach_process(p, pm); } break; /* * Retrieve the MSR number associated with the counter * 'pmc_id'. This allows processes to directly use RDPMC * instructions to read their PMCs, without the overhead of a * system call. */ case PMC_OP_PMCGETMSR: { int adjri, ri; struct pmc *pm; struct pmc_target *pt; struct pmc_op_getmsr gm; struct pmc_classdep *pcd; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &gm, sizeof(gm))) != 0) break; if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) break; /* * The allocated PMC has to be a process virtual PMC, * i.e., of type MODE_T[CS]. Global PMCs can only be * read using the PMCREAD operation since they may be * allocated on a different CPU than the one we could * be running on at the time of the RDPMC instruction. * * The GETMSR operation is not allowed for PMCs that * are inherited across processes. */ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } /* * It only makes sense to use a RDPMC (or its * equivalent instruction on non-x86 architectures) on * a process that has allocated and attached a PMC to * itself. Conversely the PMC is only allowed to have * one process attached to it -- its owner. */ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || LIST_NEXT(pt, pt_next) != NULL || pt->pt_process->pp_proc != pm->pm_owner->po_owner) { error = EINVAL; break; } ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); /* PMC class has no 'GETMSR' support */ if (pcd->pcd_get_msr == NULL) { error = ENOSYS; break; } if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) break; if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; /* * Mark our process as using MSRs. Update machine * state using a forced context switch. */ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; pmc_force_context_switch(); } break; /* * Release an allocated PMC */ case PMC_OP_PMCRELEASE: { pmc_id_t pmcid; struct pmc *pm; struct pmc_owner *po; struct pmc_op_simple sp; /* * Find PMC pointer for the named PMC. * * Use pmc_release_pmc_descriptor() to switch off the * PMC, remove all its target threads, and remove the * PMC from its owner's list. * * Remove the owner record if this is the last PMC * owned. * * Free up space. */ if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; po = pm->pm_owner; pmc_release_pmc_descriptor(pm); pmc_maybe_remove_owner(po); pmc_destroy_pmc_descriptor(pm); } break; /* * Read and/or write a PMC. */ case PMC_OP_PMCRW: { int adjri; struct pmc *pm; uint32_t cpu, ri; pmc_value_t oldvalue; struct pmc_binding pb; struct pmc_op_pmcrw prw; struct pmc_classdep *pcd; struct pmc_op_pmcrw *pprw; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &prw, sizeof(prw))) != 0) break; ri = 0; PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, prw.pm_flags); /* must have at least one flag set */ if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { error = EINVAL; break; } /* locate pmc descriptor */ if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) break; /* Can't read a PMC that hasn't been started. */ if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } /* writing a new value is allowed only for 'STOPPED' pmcs */ if (pm->pm_state == PMC_STATE_RUNNING && (prw.pm_flags & PMC_F_NEWVALUE)) { error = EBUSY; break; } if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { /* * If this PMC is attached to its owner (i.e., * the process requesting this operation) and * is running, then attempt to get an * upto-date reading from hardware for a READ. * Writes are only allowed when the PMC is * stopped, so only update the saved value * field. * * If the PMC is not running, or is not * attached to its owner, read/write to the * savedvalue field. */ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mtx_pool_lock_spin(pmc_mtxpool, pm); cpu = curthread->td_oncpu; if (prw.pm_flags & PMC_F_OLDVALUE) { if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && (pm->pm_state == PMC_STATE_RUNNING)) error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue); else oldvalue = pm->pm_gv.pm_savedvalue; } if (prw.pm_flags & PMC_F_NEWVALUE) pm->pm_gv.pm_savedvalue = prw.pm_value; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ cpu = PMC_TO_CPU(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* move this thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); critical_enter(); /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue))) goto error; /* write out new value */ if (prw.pm_flags & PMC_F_NEWVALUE) error = (*pcd->pcd_write_pmc)(cpu, adjri, prw.pm_value); error: critical_exit(); pmc_restore_cpu_binding(&pb); if (error) break; } pprw = (struct pmc_op_pmcrw *) arg; #ifdef HWPMC_DEBUG if (prw.pm_flags & PMC_F_NEWVALUE) PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx", ri, prw.pm_value, oldvalue); else if (prw.pm_flags & PMC_F_OLDVALUE) PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); #endif /* return old value if requested */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = copyout(&oldvalue, &pprw->pm_value, sizeof(prw.pm_value)))) break; } break; /* * Set the sampling rate for a sampling mode PMC and the * initial count for a counting mode PMC. */ case PMC_OP_PMCSETCOUNT: { struct pmc *pm; struct pmc_op_pmcsetcount sc; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sc, sizeof(sc))) != 0) break; if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) break; if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pm->pm_sc.pm_reloadcount = sc.pm_count; else pm->pm_sc.pm_initial = sc.pm_count; } break; /* * Start a PMC. */ case PMC_OP_PMCSTART: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmcid %x != id %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; else if (pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_ALLOCATED) { error = EINVAL; break; } error = pmc_start(pm); } break; /* * Stop a PMC. */ case PMC_OP_PMCSTOP: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; /* * Mark the PMC as inactive and invoke the MD stop * routines if needed. */ if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; else if (pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } error = pmc_stop(pm); } break; /* * Write a user supplied value to the log file. */ case PMC_OP_WRITELOG: { struct pmc_op_writelog wl; struct pmc_owner *po; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &wl, sizeof(wl))) != 0) break; if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; break; } error = pmclog_process_userlog(po, &wl); } break; default: error = EINVAL; break; } if (is_sx_downgraded) sx_sunlock(&pmc_sx); else sx_xunlock(&pmc_sx); done_syscall: if (error) counter_u64_add(pmc_stats.pm_syscall_errors, 1); return (error); } /* * Helper functions */ /* * Mark the thread as needing callchain capture and post an AST. The * actual callchain capture will be done in a context where it is safe * to take page faults. */ static void pmc_post_callchain_callback(void) { struct thread *td; td = curthread; /* * If there is multiple PMCs for the same interrupt ignore new post */ if (td->td_pflags & TDP_CALLCHAIN) return; /* * Mark this thread as needing callchain capture. * `td->td_pflags' will be safe to touch because this thread * was in user space when it was interrupted. */ td->td_pflags |= TDP_CALLCHAIN; /* * Don't let this thread migrate between CPUs until callchain * capture completes. */ sched_pin(); return; } /* * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ static int pmc_add_sample(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace) { int error, callchaindepth; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; error = 0; /* * Allocate space for a sample buffer. */ psb = pmc_pcpu[cpu]->pc_sb[ring]; ps = psb->ps_write; if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { counter_u64_add(ps->ps_pmc->pm_runcount, -1); counter_u64_add(pmc_stats.pm_overwrites, 1); ps->ps_nsamples = 0; } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_pcpu_state[cpu].pps_stalled = 1; counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); callchaindepth = 1; error = ENOMEM; goto done; } /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, (unsigned long)counter_u64_fetch(pm->pm_runcount))); counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ td = curthread; ps->ps_pmc = pm; ps->ps_td = td; ps->ps_pid = td->td_proc->p_pid; ps->ps_tid = td->td_tid; ps->ps_cpu = cpu; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; if (callchaindepth == 1) ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); else { /* * Kernel stack traversals can be done immediately, * while we defer to an AST for user space traversals. */ if (!inuserspace) { callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, callchaindepth, tf); } else { pmc_post_callchain_callback(); callchaindepth = PMC_SAMPLE_INUSE; } } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ if (ring == PMC_UR) { ps->ps_nsamples_actual = callchaindepth; /* mark entry as in use */ ps->ps_nsamples = PMC_SAMPLE_INUSE; } else ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; if (ps == psb->ps_fence) psb->ps_write = psb->ps_samples; else psb->ps_write = ps; done: /* mark CPU as needing processing */ if (callchaindepth != PMC_SAMPLE_INUSE) DPCPU_SET(pmc_sampled, 1); return (error); } /* * Interrupt processing. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ int pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace) { struct thread *td; td = curthread; if ((pm->pm_flags & PMC_F_USERCALLCHAIN) && (td->td_proc->p_flag & P_KPROC) == 0 && !inuserspace) { atomic_add_int(&curthread->td_pmcpend, 1); return (pmc_add_sample(cpu, PMC_UR, pm, tf, 0)); } return (pmc_add_sample(cpu, ring, pm, tf, inuserspace)); } /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets * rescheduled. */ static void pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) { struct pmc *pm; struct thread *td; struct pmc_sample *ps, *ps_end; struct pmc_samplebuffer *psb; int nsamples, nrecords, pass; #ifdef INVARIANTS int ncallchains; int nfree; #endif psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; KASSERT(td->td_pflags & TDP_CALLCHAIN, ("[pmc,%d] Retrieving callchain for thread that doesn't want it", __LINE__)); #ifdef INVARIANTS ncallchains = 0; nfree = 0; #endif nrecords = INT_MAX; pass = 0; restart: if (ring == PMC_UR) nrecords = atomic_readandclear_32(&td->td_pmcpend); /* * Iterate through all deferred callchain requests. * Walk from the current read pointer to the current * write pointer. */ ps = psb->ps_read; ps_end = psb->ps_write; do { #ifdef INVARIANTS if (ps->ps_nsamples == PMC_SAMPLE_FREE) { nfree++; goto next; } if ((ps->ps_pmc == NULL) || (ps->ps_pmc->pm_state != PMC_STATE_RUNNING)) nfree++; #endif if (ps->ps_nsamples != PMC_SAMPLE_INUSE) goto next; if (ps->ps_td != td) goto next; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, ps->ps_cpu, PCPU_GET(cpuid))); pm = ps->ps_pmc; KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); if (ring == PMC_UR) { nsamples = ps->ps_nsamples_actual; counter_u64_add(pmc_stats.pm_merges, 1); } else nsamples = 0; /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ #ifdef INVARIANTS ncallchains++; #endif if (__predict_true(nsamples < pmc_callchaindepth - 1)) nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples, pmc_callchaindepth - nsamples - 1, tf); wmb(); ps->ps_nsamples = nsamples; if (nrecords-- == 1) break; next: /* increment the pointer, modulo sample ring size */ if (++ps == psb->ps_fence) ps = psb->ps_samples; } while (ps != ps_end); if (__predict_false(ring == PMC_UR && td->td_pmcpend)) { if (pass == 0) { pass = 1; goto restart; } /* only collect samples for this part once */ td->td_pmcpend = 0; } #ifdef INVARIANTS if (ring == PMC_HR) KASSERT(ncallchains > 0 || nfree > 0, ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, cpu)); #endif /* mark CPU as needing processing */ DPCPU_SET(pmc_sampled, 1); } static void pmc_flush_ring(int cpu, int ring) { struct pmc *pm; struct pmc_sample *ps; struct pmc_samplebuffer *psb; int n; psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) goto next; pm = ps->ps_pmc; counter_u64_add(pm->pm_runcount, -1); ps->ps_nsamples = PMC_SAMPLE_FREE; /* increment read pointer, modulo sample size */ next: if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } } void pmc_flush_samples(int cpu) { int n; for (n = 0; n < PMC_NUM_SR; n++) pmc_flush_ring(cpu, n); } /* * Process saved PC samples. */ static void pmc_process_samples(int cpu, int ring) { struct pmc *pm; int adjri, n; struct thread *td; struct pmc_owner *po; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) break; pm = ps->ps_pmc; KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, (unsigned long)counter_u64_fetch(pm->pm_runcount))); po = pm->pm_owner; KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); /* Ignore PMCs that have been switched off */ if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ DPCPU_SET(pmc_sampled, 1); break; } PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); /* * If this is a process-mode PMC that is attached to * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. * * Otherwise, this is either a sampling-mode PMC that * is attached to a different process than its owner, * or a system-wide sampling PMC. Dispatch a log * entry to the PMC's owner process. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } } else pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ counter_u64_add(pm->pm_runcount, -1); /* increment read pointer, modulo sample size */ if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } counter_u64_add(pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) return; /* * Restart any stalled sampling PMCs on this CPU. * * If the NMI handler sets the pm_stalled field of a PMC after * the check below, we'll end up processing the stalled PMC at * the next hardclock tick. */ for (n = 0; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); (void) (*pcd->pcd_get_config)(cpu,adjri,&pm); if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ !pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */ !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */ continue; pm->pm_pcpu_state[cpu].pps_stalled = 0; (*pcd->pcd_start_pmc)(cpu, adjri); } } /* * Event handlers. */ /* * Handle a process exit. * * Remove this process from all hash tables. If this process * owned any PMCs, turn off those PMCs and deallocate them, * removing any associations with target processes. * * This function will be called by the last 'thread' of a * process. * * XXX This eventhandler gets called early in the exit process. * Consider using a 'hook' invocation from thread_exit() or equivalent * spot. Another negative is that kse_exit doesn't seem to call * exit1() [??]. * */ static void pmc_process_exit(void *arg __unused, struct proc *p) { struct pmc *pm; int adjri, cpu; unsigned int ri; int is_using_hwpmcs; struct pmc_owner *po; struct pmc_process *pp; struct pmc_classdep *pcd; pmc_value_t newvalue, tmp; PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); /* * Log a sysexit event to all SS PMC owners. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); epoch_exit_preempt(global_epoch_preempt); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); /* * Since this code is invoked by the last thread in an exiting * process, we would have context switched IN at some prior * point. However, with PREEMPTION, kernel mode context * switches may happen any time, so we want to disable a * context switch OUT till we get any PMCs targeting this * process off the hardware. * * We also need to atomically remove this process' * entry from our target process hash table, using * PMC_FLAG_REMOVE. */ PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); critical_enter(); /* no preemption */ cpu = curthread->td_oncpu; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_REMOVE)) != NULL) { PMCDBG2(PRC,EXT,2, "process-exit proc=%p pmc-process=%p", p, pp); /* * The exiting process could the target of * some PMCs which will be running on * currently executing CPU. * * We need to turn these PMCs off like we * would do at context switch OUT time. */ for (ri = 0; ri < md->pmd_npmc; ri++) { /* * Pick up the pmc pointer from hardware * state similar to the CSW_OUT code. */ pm = NULL; pcd = pmc_ri_to_classdep(md, ri, &adjri); (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] bad runcount ri %d rc %ld", __LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount))); /* * Change desired state, and then stop if not * stalled. This two-step dance should avoid * race conditions where an interrupt re-enables * the PMC after this code has already checked * the pm_stalled flag. */ if (pm->pm_pcpu_state[cpu].pps_cpustate) { pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (!pm->pm_pcpu_state[cpu].pps_stalled) { (void) pcd->pcd_stop_pmc(cpu, adjri); if (PMC_TO_MODE(pm) == PMC_MODE_TC) { pcd->pcd_read_pmc(cpu, adjri, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin( pmc_mtxpool, pm); } } } counter_u64_add(pm->pm_runcount, -1); KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); (void) pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Inform the MD layer of this pseudo "context switch * out" */ (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); critical_exit(); /* ok to be pre-empted now */ /* * Unlink this process from the PMCs that are * targeting it. This will send a signal to * all PMC owner's whose PMCs are orphaned. * * Log PMC value at exit time if requested. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) pmclog_process_procexit(pm, pp); pmc_unlink_target_process(pm, pp); } free(pp, M_PMC); } else critical_exit(); /* pp == NULL */ /* * If the process owned PMCs, free them up and free up * memory. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } sx_xunlock(&pmc_sx); } /* * Handle a process fork. * * If the parent process 'p1' is under HWPMC monitoring, then copy * over any attached PMCs that have 'do_descendants' semantics. */ static void pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, int flags) { int is_using_hwpmcs; unsigned int ri; uint32_t do_descendants; struct pmc *pm; struct pmc_owner *po; struct pmc_process *ppnew, *ppold; (void) flags; /* unused parameter */ PROC_LOCK(p1); is_using_hwpmcs = p1->p_flag & P_HWPMC; PROC_UNLOCK(p1); /* * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); pmclog_process_proccreate(po, newproc, 1); } epoch_exit_preempt(global_epoch_preempt); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, p1->p_pid, p1->p_comm, newproc); /* * If the parent process (curthread->td_proc) is a * target of any PMCs, look for PMCs that are to be * inherited, and link these into the new process * descriptor. */ if ((ppold = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) goto done; /* nothing to do */ do_descendants = 0; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; if (do_descendants == 0) /* nothing to do */ goto done; /* * Now mark the new process as being tracked by this driver. */ PROC_LOCK(newproc); newproc->p_flag |= P_HWPMC; PROC_UNLOCK(newproc); /* allocate a descriptor for the new process */ if ((ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE)) == NULL) goto done; /* * Run through all PMCs that were targeting the old process * and which specified F_DESCENDANTS and attach them to the * new process. * * Log the fork event to all owners of PMCs attached to this * process, if not already logged. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS)) { pmc_link_target_process(pm, ppnew); po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); } done: sx_xunlock(&pmc_sx); } static void pmc_process_threadcreate(struct thread *td) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadcreate(po, td, 1); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_threadexit(struct thread *td) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadexit(po, td); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_proccreate(struct proc *p) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_proccreate(po, p, 1 /* sync */); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_allproc(struct pmc *pm) { struct pmc_owner *po; struct thread *td; struct proc *p; po = pm->pm_owner; if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { pmclog_process_proccreate(po, p, 0 /* sync */); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) pmclog_process_threadcreate(po, td, 0 /* sync */); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); pmclog_flush(po, 0); } static void pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; /* * Notify owners of system sampling PMCs about KLD operations. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); epoch_exit_preempt(global_epoch_preempt); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ } static void pmc_kld_unload(void *arg __unused, const char *filename __unused, caddr_t address, size_t size) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); epoch_exit_preempt(global_epoch_preempt); /* * TODO: Notify owners of process-sampling PMCs. */ } /* * initialization */ static const char * pmc_name_of_pmcclass(enum pmc_class class) { switch (class) { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) \ case PMC_CLASS_##S: \ return #S; __PMC_CLASSES(); default: return (""); } } /* * Base class initializer: allocate structure and set default classes. */ struct pmc_mdep * pmc_mdep_alloc(int nclasses) { struct pmc_mdep *md; int n; /* SOFT + md classes */ n = 1 + nclasses; md = malloc(sizeof(struct pmc_mdep) + n * sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO); md->pmd_nclass = n; /* Add base class. */ pmc_soft_initialize(md); return md; } void pmc_mdep_free(struct pmc_mdep *md) { pmc_soft_finalize(md); free(md, M_PMC); } static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static struct pmc_mdep * pmc_generic_cpu_initialize(void) { struct pmc_mdep *md; md = pmc_mdep_alloc(0); md->pmd_cputype = PMC_CPU_GENERIC; md->pmd_pcpu_init = NULL; md->pmd_pcpu_fini = NULL; md->pmd_switch_in = generic_switch_in; md->pmd_switch_out = generic_switch_out; return (md); } static void pmc_generic_cpu_finalize(struct pmc_mdep *md) { (void) md; } static int pmc_initialize(void) { int c, cpu, error, n, ri; unsigned int maxcpu, domain; struct pcpu *pc; struct pmc_binding pb; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *sb; md = NULL; error = 0; pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK); pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK); #ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr))) pmc_debugflags_parse(pmc_debugstr, pmc_debugstr+strlen(pmc_debugstr)); #endif PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); /* check kernel version */ if (pmc_kernel_version != PMC_VERSION) { if (pmc_kernel_version == 0) printf("hwpmc: this kernel has not been compiled with " "'options HWPMC_HOOKS'.\n"); else printf("hwpmc: kernel version (0x%x) does not match " "module version (0x%x).\n", pmc_kernel_version, PMC_VERSION); return EPROGMISMATCH; } /* * check sysctl parameters */ if (pmc_hashsize <= 0) { (void) printf("hwpmc: tunable \"hashsize\"=%d must be " "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { (void) printf("hwpmc: tunable \"nsamples\"=%d out of " "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " "range - using %d.\n", pmc_callchaindepth, PMC_CALLCHAIN_DEPTH_MAX); pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; } md = pmc_md_initialize(); if (md == NULL) { /* Default to generic CPU. */ md = pmc_generic_cpu_initialize(); if (md == NULL) return (ENOSYS); } KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); /* Compute the map from row-indices to classdep pointers. */ pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); for (n = 0; n < md->pmd_npmc; n++) pmc_rowindex_to_classdep[n] = NULL; for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; for (n = 0; n < pcd->pcd_num; n++, ri++) pmc_rowindex_to_classdep[ri] = pcd; } KASSERT(ri == md->pmd_npmc, ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, ri, md->pmd_npmc)); maxcpu = pmc_cpu_max(); /* allocate space for the per-cpu array */ pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, M_WAITOK|M_ZERO); /* per-cpu 'saved values' for managing process-mode PMCs */ pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK); /* Perform CPU-dependent initialization. */ pmc_save_cpu_binding(&pb); error = 0; for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pmc_select_cpu(cpu); pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, M_WAITOK|M_ZERO); if (md->pmd_pcpu_init) error = md->pmd_pcpu_init(md, cpu); for (n = 0; error == 0 && n < md->pmd_nclass; n++) error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); } pmc_restore_cpu_binding(&pb); if (error) return (error); /* allocate space for the sample array */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pc = pcpu_find(cpu); domain = pc->pc_domain; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb; } /* allocate space for the row disposition array */ pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); /* mark all PMCs as available */ for (n = 0; n < (int) md->pmd_npmc; n++) PMC_MARK_ROW_FREE(n); /* allocate thread hash tables */ pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, &pmc_ownerhashmask); pmc_processhash = hashinit(pmc_hashsize, M_PMC, &pmc_processhashmask); mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", MTX_SPIN); CK_LIST_INIT(&pmc_ss_owners); pmc_ss_count = 0; /* allocate a pool of spin mutexes */ pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, MTX_SPIN); PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); /* Initialize a spin mutex for the thread free list. */ mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf", MTX_SPIN); /* * Initialize the callout to monitor the thread free list. * This callout will also handle the initial population of the list. */ taskqgroup_config_gtask_init(NULL, &free_gtask, pmc_thread_descriptor_pool_free_task, "thread descriptor pool free task"); /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); /* register kld event handlers */ pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, NULL, EVENTHANDLER_PRI_ANY); pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, NULL, EVENTHANDLER_PRI_ANY); /* initialize logging */ pmclog_initialize(); /* set hook functions */ pmc_intr = md->pmd_intr; wmb(); pmc_hook = pmc_hook_handler; if (error == 0) { printf(PMC_MODULE_NAME ":"); for (n = 0; n < (int) md->pmd_nclass; n++) { pcd = &md->pmd_classdep[n]; printf(" %s/%d/%d/0x%b", pmc_name_of_pmcclass(pcd->pcd_class), pcd->pcd_num, pcd->pcd_width, pcd->pcd_caps, "\20" "\1INT\2USR\3SYS\4EDG\5THR" "\6REA\7WRI\10INV\11QUA\12PRC" "\13TAG\14CSC"); } printf("\n"); } return (error); } /* prepare to be unloaded */ static void pmc_cleanup(void) { int c, cpu; unsigned int maxcpu; struct pmc_ownerhash *ph; struct pmc_owner *po, *tmp; struct pmc_binding pb; #ifdef HWPMC_DEBUG struct pmc_processhash *prh; #endif PMCDBG0(MOD,INI,0, "cleanup"); /* switch off sampling */ CPU_FOREACH(cpu) DPCPU_ID_SET(cpu, pmc_sampled, 0); pmc_intr = NULL; sx_xlock(&pmc_sx); if (pmc_hook == NULL) { /* being unloaded already */ sx_xunlock(&pmc_sx); return; } pmc_hook = NULL; /* prevent new threads from entering module */ /* deregister event handlers */ EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); /* send SIGBUS to all owner threads, free up allocations */ if (pmc_ownerhash) for (ph = pmc_ownerhash; ph <= &pmc_ownerhash[pmc_ownerhashmask]; ph++) { LIST_FOREACH_SAFE(po, ph, po_next, tmp) { pmc_remove_owner(po); /* send SIGBUS to owner processes */ PMCDBG3(MOD,INI,2, "cleanup signal proc=%p " "(%d, %s)", po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); PROC_LOCK(po->po_owner); kern_psignal(po->po_owner, SIGBUS); PROC_UNLOCK(po->po_owner); pmc_destroy_owner_descriptor(po); } } /* reclaim allocated data structures */ mtx_destroy(&pmc_threadfreelist_mtx); pmc_thread_descriptor_pool_drain(); if (pmc_mtxpool) mtx_pool_destroy(&pmc_mtxpool); mtx_destroy(&pmc_processhash_mtx); taskqgroup_config_gtask_deinit(&free_gtask); if (pmc_processhash) { #ifdef HWPMC_DEBUG struct pmc_process *pp; PMCDBG0(MOD,INI,3, "destroy process hash"); for (prh = pmc_processhash; prh <= &pmc_processhash[pmc_processhashmask]; prh++) LIST_FOREACH(pp, prh, pp_next) PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); #endif hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); pmc_processhash = NULL; } if (pmc_ownerhash) { PMCDBG0(MOD,INI,3, "destroy owner hash"); hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); pmc_ownerhash = NULL; } KASSERT(CK_LIST_EMPTY(&pmc_ss_owners), ("[pmc,%d] Global SS owner list not empty", __LINE__)); KASSERT(pmc_ss_count == 0, ("[pmc,%d] Global SS count not empty", __LINE__)); /* do processor and pmc-class dependent cleanup */ maxcpu = pmc_cpu_max(); PMCDBG0(MOD,INI,3, "md cleanup"); if (md) { pmc_save_cpu_binding(&pb); for (cpu = 0; cpu < maxcpu; cpu++) { PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", cpu, pmc_pcpu[cpu]); if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) continue; pmc_select_cpu(cpu); for (c = 0; c < md->pmd_nclass; c++) md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); if (md->pmd_pcpu_fini) md->pmd_pcpu_fini(md, cpu); } if (md->pmd_cputype == PMC_CPU_GENERIC) pmc_generic_cpu_finalize(md); else pmc_md_finalize(md); pmc_mdep_free(md); md = NULL; pmc_restore_cpu_binding(&pb); } /* Free per-cpu descriptors. */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL, ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__, cpu)); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC); free_domain(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); pmc_pcpu = NULL; free(pmc_pcpu_saved, M_PMC); pmc_pcpu_saved = NULL; if (pmc_pmcdisp) { free(pmc_pmcdisp, M_PMC); pmc_pmcdisp = NULL; } if (pmc_rowindex_to_classdep) { free(pmc_rowindex_to_classdep, M_PMC); pmc_rowindex_to_classdep = NULL; } pmclog_shutdown(); counter_u64_free(pmc_stats.pm_intr_ignored); counter_u64_free(pmc_stats.pm_intr_processed); counter_u64_free(pmc_stats.pm_intr_bufferfull); counter_u64_free(pmc_stats.pm_syscalls); counter_u64_free(pmc_stats.pm_syscall_errors); counter_u64_free(pmc_stats.pm_buffer_requests); counter_u64_free(pmc_stats.pm_buffer_requests_failed); counter_u64_free(pmc_stats.pm_log_sweeps); counter_u64_free(pmc_stats.pm_merges); counter_u64_free(pmc_stats.pm_overwrites); sx_xunlock(&pmc_sx); /* we are done */ } /* * The function called at load/unload. */ static int load (struct module *module __unused, int cmd, void *arg __unused) { int error; error = 0; switch (cmd) { case MOD_LOAD : /* initialize the subsystem */ error = pmc_initialize(); if (error != 0) break; PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d", pmc_syscall_num, pmc_cpu_max()); break; case MOD_UNLOAD : case MOD_SHUTDOWN: pmc_cleanup(); PMCDBG0(MOD,INI,1, "unloaded"); break; default : error = EINVAL; /* XXX should panic(9) */ break; } return error; } diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index 35183bc8dba6..a1e24bc8d47b 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -1,1220 +1,1221 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #include #include #ifdef _KERNEL #include #include #endif #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ -#define PMC_VERSION_MAJOR 0x06 -#define PMC_VERSION_MINOR 0x02 +#define PMC_VERSION_MAJOR 0x07 +#define PMC_VERSION_MINOR 0x03 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) #define PMC_CPUID_LEN 64 /* cpu model name for pmu lookup */ extern char pmc_cpuid[PMC_CPUID_LEN]; /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K7, 0x00, "AMD K7") \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(INTEL_P5, 0x80, "Intel Pentium") \ __PMC_CPU(INTEL_P6, 0x81, "Intel Pentium Pro") \ __PMC_CPU(INTEL_CL, 0x82, "Intel Celeron") \ __PMC_CPU(INTEL_PII, 0x83, "Intel Pentium II") \ __PMC_CPU(INTEL_PIII, 0x84, "Intel Pentium III") \ __PMC_CPU(INTEL_PM, 0x85, "Intel Pentium M") \ __PMC_CPU(INTEL_PIV, 0x86, "Intel Pentium IV") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \ __PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \ __PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ __PMC_CPU(MIPS_74K, 0x202, "MIPS 74K") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(GENERIC, 0x400, "Generic") \ __PMC_CPU(ARMV7_CORTEX_A5, 0x500, "ARMv7 Cortex A5") \ __PMC_CPU(ARMV7_CORTEX_A7, 0x501, "ARMv7 Cortex A7") \ __PMC_CPU(ARMV7_CORTEX_A8, 0x502, "ARMv7 Cortex A8") \ __PMC_CPU(ARMV7_CORTEX_A9, 0x503, "ARMv7 Cortex A9") \ __PMC_CPU(ARMV7_CORTEX_A15, 0x504, "ARMv7 Cortex A15") \ __PMC_CPU(ARMV7_CORTEX_A17, 0x505, "ARMv7 Cortex A17") \ __PMC_CPU(ARMV8_CORTEX_A53, 0x600, "ARMv8 Cortex A53") \ __PMC_CPU(ARMV8_CORTEX_A57, 0x601, "ARMv8 Cortex A57") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K7 #define PMC_CPU_LAST PMC_CPU_GENERIC /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K7, 0x01, "AMD K7 performance counters") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ __PMC_CLASS(P5, 0x03, "Intel Pentium counters") \ __PMC_CLASS(P6, 0x04, "Intel Pentium Pro counters") \ __PMC_CLASS(P4, 0x05, "Intel Pentium-IV counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ __PMC_CLASS(UCP, 0x09, "Intel Uncore programmable") \ __PMC_CLASS(XSCALE, 0x0A, "Intel XScale counters") \ __PMC_CLASS(MIPS24K, 0x0B, "MIPS 24K") \ __PMC_CLASS(OCTEON, 0x0C, "Cavium Octeon") \ __PMC_CLASS(PPC7450, 0x0D, "Motorola MPC7450 class") \ __PMC_CLASS(PPC970, 0x0E, "IBM PowerPC 970 class") \ __PMC_CLASS(SOFT, 0x0F, "Software events") \ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \ __PMC_CLASS(E500, 0x13, "Freescale e500 class") enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) PMC_CLASS_##S = V, __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_E500 /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparison sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_CASCADE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ #define PMC_F_UNUSED1 0x00000001 /* unused */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ #define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * * +-----------------------+-------+-----------+ * | CPU | PMC MODE | CLASS | ROW INDEX | * +-----------------------+-------+-----------+ * * where CPU is 12 bits, MODE 8, CLASS 4, and ROW INDEX 8 Field 'CPU' * is set to the requested CPU for system-wide PMCs or PMC_CPU_ANY for * process-mode PMCs. Field 'PMC MODE' is the allocated PMC mode. * Field 'PMC CLASS' is the class of the PMC. Field 'ROW INDEX' is the * row index for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) #define PMC_ID_TO_CLASS(ID) (((ID) & 0xF00) >> 8) #define PMC_ID_TO_MODE(ID) (((ID) & 0xFF000) >> 12) #define PMC_ID_TO_CPU(ID) (((ID) & 0xFFF00000) >> 20) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ ((((CPU) & 0xFFF) << 20) | (((MODE) & 0xFF) << 12) | \ (((CLASS) & 0xF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ + pmc_value_t pm_count; /* initial/sample count */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ #ifdef _KERNEL struct pmc_driverstats { counter_u64_t pm_intr_ignored; /* #interrupts ignored */ counter_u64_t pm_intr_processed; /* #interrupts processed */ counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */ counter_u64_t pm_syscalls; /* #syscalls */ counter_u64_t pm_syscall_errors; /* #syscalls with errors */ counter_u64_t pm_buffer_requests; /* #buffer requests */ counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ counter_u64_t pm_log_sweeps; /* #sample buffer processing passes */ counter_u64_t pm_merges; /* merged k+u */ counter_u64_t pm_overwrites; /* UR overwrites */ }; #endif struct pmc_op_getdriverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ unsigned int pm_intr_processed; /* #interrupts processed */ unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ unsigned int pm_syscalls; /* #syscalls */ unsigned int pm_syscall_errors; /* #syscalls with errors */ unsigned int pm_buffer_requests; /* #buffer requests */ unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ unsigned int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address associated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 256 #define PMC_NLOGBUFFERS_PCPU 32 #define PMC_NSAMPLES 256 #define PMC_CALLCHAIN_DEPTH 128 #define PMC_THREADLIST_MAX 128 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) * (g) - global_epoch_preempt (epoch) * (p) - pmc_sx (sx) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc_pcpu_state { uint8_t pps_stalled; uint8_t pps_cpustate; } __aligned(CACHE_LINE_SIZE); struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; struct pmc_pcpu_state *pm_pcpu_state; volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ counter_u64_t pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ uint32_t pm_overflowcnt; /* count overflow interrupts */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ enum pmc_class pm_class; /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_threadpmcstate * * Record per-PMC, per-thread state. */ struct pmc_threadpmcstate { pmc_value_t pt_pmcval; /* per-thread reload count */ }; /* * struct pmc_thread * * Record a 'target' thread being profiled. */ struct pmc_thread { LIST_ENTRY(pmc_thread) pt_next; /* linked list */ struct thread *pt_td; /* target thread */ struct pmc_threadpmcstate pt_pmcs[]; /* per-PMC state */ }; /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ LIST_HEAD(,pmc_thread) pp_tds; /* list of threads */ struct mtx *pp_tdslock; /* lock on pp_tds thread list */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target process */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ CK_LIST_ENTRY(pmc_owner) po_ssnext; /* (g/p) list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint16_t ps_nsamples_actual; uint16_t ps_cpu; /* cpu number */ uint16_t ps_flags; /* other flags */ lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF) struct pmc_samplebuffer { struct pmc_sample * volatile ps_read; /* read pointer */ struct pmc_sample * volatile ps_write; /* write pointer */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample *ps_fence; /* one beyond ps_samples[] */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[3]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri); int (*pcd_stop_pmc)(int _cpu, int _ri); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* per-cpu initialization and finalization */ int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(int _cpu, struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_driverstats pmc_stats; #if defined(HWPMC_DEBUG) #include /* debug flags, major flag groups */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define KTR_PMC KTR_SUBSYS #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG0(M, N, L, F) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR0(KTR_PMC, #M ":" #N ":" #L ": " F); \ } while (0) #define PMCDBG1(M, N, L, F, p1) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR1(KTR_PMC, #M ":" #N ":" #L ": " F, p1); \ } while (0) #define PMCDBG2(M, N, L, F, p1, p2) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR2(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2); \ } while (0) #define PMCDBG3(M, N, L, F, p1, p2, p3) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR3(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3); \ } while (0) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR4(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4);\ } while (0) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR5(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5); \ } while (0) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR6(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5, p6); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define PMCDBG0(M, N, L, F) /* nothing */ #define PMCDBG1(M, N, L, F, p1) #define PMCDBG2(M, N, L, F, p1, p2) #define PMCDBG3(M, N, L, F, p1, p2, p3) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _cpu, int _ring, struct pmc *_pm, struct trapframe *_tf, int _inuserspace); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); void pmc_flush_samples(int cpu); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */ diff --git a/sys/sys/pmclog.h b/sys/sys/pmclog.h index 9e37985d93b4..c0b645292cd6 100644 --- a/sys/sys/pmclog.h +++ b/sys/sys/pmclog.h @@ -1,319 +1,322 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMCLOG_H_ #define _SYS_PMCLOG_H_ #include enum pmclog_type { /* V1 ABI */ PMCLOG_TYPE_CLOSELOG = 1, PMCLOG_TYPE_DROPNOTIFY = 2, PMCLOG_TYPE_INITIALIZE = 3, PMCLOG_TYPE_MAPPINGCHANGE = 4, /* unused in v1 */ PMCLOG_TYPE_PMCALLOCATE = 5, PMCLOG_TYPE_PMCATTACH = 6, PMCLOG_TYPE_PMCDETACH = 7, PMCLOG_TYPE_PROCCSW = 8, PMCLOG_TYPE_PROCEXEC = 9, PMCLOG_TYPE_PROCEXIT = 10, PMCLOG_TYPE_PROCFORK = 11, PMCLOG_TYPE_SYSEXIT = 12, PMCLOG_TYPE_USERDATA = 13, /* * V2 ABI * * The MAP_{IN,OUT} event types obsolete the MAPPING_CHANGE * event type. The CALLCHAIN event type obsoletes the * PCSAMPLE event type. */ PMCLOG_TYPE_MAP_IN = 14, PMCLOG_TYPE_MAP_OUT = 15, PMCLOG_TYPE_CALLCHAIN = 16, /* * V3 ABI * * New variant of PMCLOG_TYPE_PMCALLOCATE for dynamic event. */ PMCLOG_TYPE_PMCALLOCATEDYN = 17, /* * V6 ABI */ PMCLOG_TYPE_THR_CREATE = 18, PMCLOG_TYPE_THR_EXIT = 19, PMCLOG_TYPE_PROC_CREATE = 20 }; /* * A log entry descriptor comprises of a 32 bit header and a 64 bit * time stamp followed by as many 32 bit words are required to record * the event. * * Header field format: * * 31 24 16 0 * +------------+------------+-----------------------------------+ * | MAGIC | TYPE | LENGTH | * +------------+------------+-----------------------------------+ * * MAGIC is the constant PMCLOG_HEADER_MAGIC. * TYPE contains a value of type enum pmclog_type. * LENGTH contains the length of the event record, in bytes. */ #define PMCLOG_ENTRY_HEADER \ uint32_t pl_header; \ uint32_t pl_ts_sec; \ uint32_t pl_ts_nsec; /* * The following structures are used to describe the size of each kind * of log entry to sizeof(). To keep the compiler from adding * padding, the fields of each structure are aligned to their natural * boundaries, and the structures are marked as 'packed'. * * The actual reading and writing of the log file is always in terms * of 4 byte quantities. */ struct pmclog_callchain { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uint32_t pl_tid; uint32_t pl_pmcid; uint32_t pl_cpuflags; uint32_t pl_cpuflags2; /* 8 byte aligned */ uintptr_t pl_pc[PMC_CALLCHAIN_DEPTH_MAX]; } __packed; #define PMC_CALLCHAIN_CPUFLAGS_TO_CPU(CF) (((CF) >> 16) & 0xFFFF) #define PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(CF) ((CF) & PMC_CC_F_USERSPACE) #define PMC_CALLCHAIN_TO_CPUFLAGS(CPU,FLAGS) \ (((CPU) << 16) | ((FLAGS) & 0xFFFF)) struct pmclog_closelog { PMCLOG_ENTRY_HEADER uint32_t pl_pad; }; struct pmclog_dropnotify { PMCLOG_ENTRY_HEADER uint32_t pl_pad; }; struct pmclog_initialize { PMCLOG_ENTRY_HEADER uint32_t pl_version; /* driver version */ uint32_t pl_cpu; /* enum pmc_cputype */ uint32_t pl_pad; char pl_cpuid[PMC_CPUID_LEN]; } __packed; struct pmclog_map_in { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uintfptr_t pl_start; /* 8 byte aligned */ char pl_pathname[PATH_MAX]; } __packed; struct pmclog_map_out { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uintfptr_t pl_start; /* 8 byte aligned */ uintfptr_t pl_end; } __packed; struct pmclog_pmcallocate { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_event; uint32_t pl_flags; + uint64_t pl_rate; } __packed; struct pmclog_pmcattach { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; char pl_pathname[PATH_MAX]; } __packed; struct pmclog_pmcdetach { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; } __packed; struct pmclog_proccsw { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint64_t pl_value; /* keep 8 byte aligned */ uint32_t pl_pid; uint32_t pl_tid; } __packed; struct pmclog_proccreate { PMCLOG_ENTRY_HEADER uint32_t pl_pid; + uint32_t pl_flags; + uint32_t pl_pad; uint64_t pl_pcomm[MAXCOMLEN+1]; /* keep 8 byte aligned */ } __packed; struct pmclog_procexec { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uint32_t pl_pmcid; uint32_t pl_pad; uintfptr_t pl_start; /* keep 8 byte aligned */ char pl_pathname[PATH_MAX]; } __packed; struct pmclog_procexit { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; uint64_t pl_value; /* keep 8 byte aligned */ } __packed; struct pmclog_procfork { PMCLOG_ENTRY_HEADER uint32_t pl_oldpid; uint32_t pl_newpid; uint32_t pl_pad; } __packed; struct pmclog_sysexit { PMCLOG_ENTRY_HEADER uint32_t pl_pid; } __packed; struct pmclog_threadcreate { PMCLOG_ENTRY_HEADER uint32_t pl_tid; uint32_t pl_pid; - uint32_t pl_pad; + uint32_t pl_flags; uint64_t pl_tdname[MAXCOMLEN+1]; /* keep 8 byte aligned */ } __packed; struct pmclog_threadexit { PMCLOG_ENTRY_HEADER uint32_t pl_tid; } __packed; struct pmclog_userdata { PMCLOG_ENTRY_HEADER uint32_t pl_userdata; } __packed; struct pmclog_pmcallocatedyn { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_event; uint32_t pl_flags; char pl_evname[PMC_NAME_MAX]; } __packed; union pmclog_entry { /* only used to size scratch areas */ struct pmclog_callchain pl_cc; struct pmclog_closelog pl_cl; struct pmclog_dropnotify pl_dn; struct pmclog_initialize pl_i; struct pmclog_map_in pl_mi; struct pmclog_map_out pl_mo; struct pmclog_pmcallocate pl_a; struct pmclog_pmcallocatedyn pl_ad; struct pmclog_pmcattach pl_t; struct pmclog_pmcdetach pl_d; struct pmclog_proccsw pl_c; struct pmclog_procexec pl_x; struct pmclog_procexit pl_e; struct pmclog_procfork pl_f; struct pmclog_sysexit pl_se; struct pmclog_userdata pl_u; }; #define PMCLOG_HEADER_MAGIC 0xEEU #define PMCLOG_HEADER_TO_LENGTH(H) \ ((H) & 0x0000FFFF) #define PMCLOG_HEADER_TO_TYPE(H) \ (((H) & 0x00FF0000) >> 16) #define PMCLOG_HEADER_TO_MAGIC(H) \ (((H) & 0xFF000000) >> 24) #define PMCLOG_HEADER_CHECK_MAGIC(H) \ (PMCLOG_HEADER_TO_MAGIC(H) == PMCLOG_HEADER_MAGIC) #ifdef _KERNEL /* * Prototypes */ int pmclog_configure_log(struct pmc_mdep *_md, struct pmc_owner *_po, int _logfd); int pmclog_deconfigure_log(struct pmc_owner *_po); int pmclog_flush(struct pmc_owner *_po, int force); int pmclog_close(struct pmc_owner *_po); void pmclog_initialize(void); int pmclog_proc_create(struct thread *td, void **handlep); void pmclog_proc_ignite(void *handle, struct pmc_owner *po); void pmclog_process_callchain(struct pmc *_pm, struct pmc_sample *_ps); void pmclog_process_closelog(struct pmc_owner *po); void pmclog_process_dropnotify(struct pmc_owner *po); void pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, const char *path); void pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, uintfptr_t end); void pmclog_process_pmcallocate(struct pmc *_pm); void pmclog_process_pmcattach(struct pmc *_pm, pid_t _pid, char *_path); void pmclog_process_pmcdetach(struct pmc *_pm, pid_t _pid); void pmclog_process_proccsw(struct pmc *_pm, struct pmc_process *_pp, pmc_value_t _v, struct thread *); void pmclog_process_procexec(struct pmc_owner *_po, pmc_id_t _pmid, pid_t _pid, uintfptr_t _startaddr, char *_path); void pmclog_process_procexit(struct pmc *_pm, struct pmc_process *_pp); void pmclog_process_procfork(struct pmc_owner *_po, pid_t _oldpid, pid_t _newpid); void pmclog_process_sysexit(struct pmc_owner *_po, pid_t _pid); void pmclog_process_threadcreate(struct pmc_owner *_po, struct thread *td, int sync); void pmclog_process_threadexit(struct pmc_owner *_po, struct thread *td); void pmclog_process_proccreate(struct pmc_owner *_po, struct proc *p, int sync); int pmclog_process_userlog(struct pmc_owner *_po, struct pmc_op_writelog *_wl); void pmclog_shutdown(void); #endif /* _KERNEL */ #endif /* _SYS_PMCLOG_H_ */ diff --git a/usr.sbin/pmc/Makefile b/usr.sbin/pmc/Makefile index 481670118b30..21d727e63aeb 100644 --- a/usr.sbin/pmc/Makefile +++ b/usr.sbin/pmc/Makefile @@ -1,15 +1,16 @@ # # $FreeBSD$ # .include PROG_CXX= pmc MAN= -CXXFLAGS+= -O0 +CXXFLAGS+= LIBADD= kvm pmc m ncursesw pmcstat elf SRCS= pmc.c pmc_util.c cmd_pmc_stat.c \ - cmd_pmc_list.c cmd_pmc_filter.cc + cmd_pmc_list.c cmd_pmc_filter.cc \ + cmd_pmc_summary.cc .include diff --git a/usr.sbin/pmc/cmd_pmc.h b/usr.sbin/pmc/cmd_pmc.h index 46a253cc8713..cdec055a2036 100644 --- a/usr.sbin/pmc/cmd_pmc.h +++ b/usr.sbin/pmc/cmd_pmc.h @@ -1,59 +1,60 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef _CMD_PMC_H_ #define _CMD_PMC_H_ #define DEFAULT_DISPLAY_HEIGHT 256 /* file virtual height */ #define DEFAULT_DISPLAY_WIDTH 1024 /* file virtual width */ extern int pmc_displayheight; extern int pmc_displaywidth; extern int pmc_kq; extern struct pmcstat_args pmc_args; typedef int (*cmd_disp_t)(int, char **); #if defined(__cplusplus) extern "C" { #endif int cmd_pmc_stat(int, char **); int cmd_pmc_filter(int, char **); int cmd_pmc_stat_system(int, char **); int cmd_pmc_list_events(int, char **); + int cmd_pmc_summary(int, char **); #if defined(__cplusplus) }; #endif int pmc_util_get_pid(struct pmcstat_args *); void pmc_util_start_pmcs(struct pmcstat_args *); void pmc_util_cleanup(struct pmcstat_args *); void pmc_util_shutdown_logging(struct pmcstat_args *args); void pmc_util_kill_process(struct pmcstat_args *args); #endif diff --git a/usr.sbin/pmc/cmd_pmc_filter.cc b/usr.sbin/pmc/cmd_pmc_filter.cc index b35bb2bee3bd..3892066c7695 100644 --- a/usr.sbin/pmc/cmd_pmc_filter.cc +++ b/usr.sbin/pmc/cmd_pmc_filter.cc @@ -1,346 +1,341 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cmd_pmc.h" #include #include -#if _LIBCPP_STD_VER >= 11 #include + using std::unordered_map; -#else -#include -using std::tr1::unordered_map; -#endif +typedef unordered_map idmap; +typedef std::pair identry; + #define LIST_MAX 64 static struct option longopts[] = { {"lwps", required_argument, NULL, 't'}, {"pids", required_argument, NULL, 'p'}, {"threads", required_argument, NULL, 'T'}, {"processes", required_argument, NULL, 'P'}, {"events", required_argument, NULL, 'e'}, {NULL, 0, NULL, 0} }; static void usage(void) { errx(EX_USAGE, "\t filter log file\n" "\t -e , --events -- comma-delimited list of events to filter on\n" "\t -p , --pids -- comma-delimited list of pids to filter on\n" "\t -P , --processes -- comma-delimited list of process names to filter on\n" "\t -t , --lwps -- comma-delimited list of lwps to filter on\n" "\t -T , --threads -- comma-delimited list of thread names to filter on\n" "\t -x -- toggle inclusive filtering\n" ); } static void parse_intlist(char *strlist, uint32_t *intlist, int *pcount, int (*fn) (const char *)) { char *token; int count, tokenval; count = 0; while ((token = strsep(&strlist, ",")) != NULL && count < LIST_MAX) { if ((tokenval = fn(token)) < 0) errx(EX_USAGE, "ERROR: %s not usable value", token); intlist[count++] = tokenval; } *pcount = count; } static void parse_events(char *strlist, uint32_t intlist[LIST_MAX], int *pcount, char *cpuid) { char *token; int count, tokenval; count = 0; while ((token = strsep(&strlist, ",")) != NULL && count < LIST_MAX) { if ((tokenval = pmc_pmu_idx_get_by_event(cpuid, token)) < 0) errx(EX_USAGE, "ERROR: %s not usable value", token); intlist[count++] = tokenval; } *pcount = count; } static void parse_names(char *strlist, char *namelist[LIST_MAX], int *pcount) { char *token; int count; count = 0; while ((token = strsep(&strlist, ",")) != NULL && count < LIST_MAX) { namelist[count++] = token; } *pcount = count; } struct pmcid_ent { uint32_t pe_pmcid; uint32_t pe_idx; }; #define _PMCLOG_TO_HEADER(T,L) \ ((PMCLOG_HEADER_MAGIC << 24) | \ (PMCLOG_TYPE_ ## T << 16) | \ ((L) & 0xFFFF)) - -typedef unordered_map < int ,std::string > idmap; -typedef std::pair < int ,std::string > identry; - static bool pmc_find_name(idmap & map, uint32_t id, char *list[LIST_MAX], int count) { int i; auto kvpair = map.find(id); if (kvpair == map.end()) { printf("unknown id: %d\n", id); return (false); } auto p = list; for (i = 0; i < count; i++, p++) { if (strstr(kvpair->second.c_str(), *p) != NULL) return (true); } return (false); } static void pmc_filter_handler(uint32_t *lwplist, int lwpcount, uint32_t *pidlist, int pidcount, char *events, char *processes, char *threads, bool exclusive, int infd, int outfd) { struct pmclog_ev ev; struct pmclog_parse_state *ps; struct pmcid_ent *pe; uint32_t eventlist[LIST_MAX]; char cpuid[PMC_CPUID_LEN]; char *proclist[LIST_MAX]; char *threadlist[LIST_MAX]; int i, pmccount, copies, eventcount, proccount, threadcount; uint32_t idx; idmap pidmap, tidmap; if ((ps = static_cast < struct pmclog_parse_state *>(pmclog_open(infd)))== NULL) errx(EX_OSERR, "ERROR: Cannot allocate pmclog parse state: %s\n", strerror(errno)); proccount = eventcount = pmccount = 0; if (processes) parse_names(processes, proclist, &proccount); if (threads) parse_names(threads, threadlist, &threadcount); while (pmclog_read(ps, &ev) == 0) { if (ev.pl_type == PMCLOG_TYPE_INITIALIZE) memcpy(cpuid, ev.pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN); if (ev.pl_type == PMCLOG_TYPE_PMCALLOCATE) pmccount++; } if (events) parse_events(events, eventlist, &eventcount, cpuid); lseek(infd, 0, SEEK_SET); pmclog_close(ps); if ((ps = static_cast < struct pmclog_parse_state *>(pmclog_open(infd)))== NULL) errx(EX_OSERR, "ERROR: Cannot allocate pmclog parse state: %s\n", strerror(errno)); if ((pe = (typeof(pe)) malloc(sizeof(*pe) * pmccount)) == NULL) errx(EX_OSERR, "ERROR: failed to allocate pmcid map"); i = 0; while (pmclog_read(ps, &ev) == 0 && i < pmccount) { if (ev.pl_type == PMCLOG_TYPE_PMCALLOCATE) { pe[i].pe_pmcid = ev.pl_u.pl_a.pl_pmcid; pe[i].pe_idx = ev.pl_u.pl_a.pl_event; i++; } } lseek(infd, 0, SEEK_SET); pmclog_close(ps); if ((ps = static_cast < struct pmclog_parse_state *>(pmclog_open(infd)))== NULL) errx(EX_OSERR, "ERROR: Cannot allocate pmclog parse state: %s\n", strerror(errno)); copies = 0; while (pmclog_read(ps, &ev) == 0) { if (ev.pl_type == PMCLOG_TYPE_THR_CREATE) - tidmap.insert(identry(ev.pl_u.pl_tc.pl_tid, ev.pl_u.pl_tc.pl_tdname)); + tidmap[ev.pl_u.pl_tc.pl_tid] = ev.pl_u.pl_tc.pl_tdname; if (ev.pl_type == PMCLOG_TYPE_PROC_CREATE) - pidmap.insert(identry(ev.pl_u.pl_pc.pl_pid, ev.pl_u.pl_pc.pl_pcomm)); + pidmap[ev.pl_u.pl_pc.pl_pid] = ev.pl_u.pl_pc.pl_pcomm; if (ev.pl_type != PMCLOG_TYPE_CALLCHAIN) { if (write(outfd, ev.pl_data, ev.pl_len) != (ssize_t)ev.pl_len) errx(EX_OSERR, "ERROR: failed output write"); continue; } if (pidcount) { for (i = 0; i < pidcount; i++) if (pidlist[i] == ev.pl_u.pl_cc.pl_pid) break; if ((i == pidcount) == exclusive) continue; } if (lwpcount) { for (i = 0; i < lwpcount; i++) if (lwplist[i] == ev.pl_u.pl_cc.pl_tid) break; if ((i == lwpcount) == exclusive) continue; } if (eventcount) { for (i = 0; i < pmccount; i++) { if (pe[i].pe_pmcid == ev.pl_u.pl_cc.pl_pmcid) break; } if (i == pmccount) errx(EX_USAGE, "ERROR: unallocated pmcid: %d\n", ev.pl_u.pl_cc.pl_pmcid); idx = pe[i].pe_idx; for (i = 0; i < eventcount; i++) { if (idx == eventlist[i]) break; } if ((i == eventcount) == exclusive) continue; } if (proccount && pmc_find_name(pidmap, ev.pl_u.pl_cc.pl_pid, proclist, proccount) == exclusive) continue; if (threadcount && pmc_find_name(tidmap, ev.pl_u.pl_cc.pl_tid, threadlist, threadcount) == exclusive) continue; if (write(outfd, ev.pl_data, ev.pl_len) != (ssize_t)ev.pl_len) errx(EX_OSERR, "ERROR: failed output write"); } } int cmd_pmc_filter(int argc, char **argv) { char *lwps, *pids, *events, *processes, *threads; uint32_t lwplist[LIST_MAX]; uint32_t pidlist[LIST_MAX]; int option, lwpcount, pidcount; int prelogfd, postlogfd; bool exclusive; threads = processes = lwps = pids = events = NULL; lwpcount = pidcount = 0; exclusive = false; while ((option = getopt_long(argc, argv, "e:p:t:xP:T:", longopts, NULL)) != -1) { switch (option) { case 'e': events = strdup(optarg); break; case 'p': pids = strdup(optarg); break; case 'P': processes = strdup(optarg); break; case 't': lwps = strdup(optarg); break; case 'T': threads = strdup(optarg); break; case 'x': exclusive = !exclusive; break; case '?': default: usage(); } } argc -= optind; argv += optind; if (argc != 2) usage(); if (lwps) parse_intlist(lwps, lwplist, &lwpcount, atoi); if (pids) parse_intlist(pids, pidlist, &pidcount, atoi); if ((prelogfd = open(argv[0], O_RDONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) errx(EX_OSERR, "ERROR: Cannot open \"%s\" for reading: %s.", argv[0], strerror(errno)); if ((postlogfd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) errx(EX_OSERR, "ERROR: Cannot open \"%s\" for writing: %s.", argv[1], strerror(errno)); pmc_filter_handler(lwplist, lwpcount, pidlist, pidcount, events, processes, threads, exclusive, prelogfd, postlogfd); return (0); } diff --git a/usr.sbin/pmc/cmd_pmc_stat.c b/usr.sbin/pmc/cmd_pmc_stat.c index 57ab6b9a54a2..7ea54c08681f 100644 --- a/usr.sbin/pmc/cmd_pmc_stat.c +++ b/usr.sbin/pmc/cmd_pmc_stat.c @@ -1,487 +1,487 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cmd_pmc.h" /* * Return the frequency of the kernel's statistics clock. */ static int getstathz(void) { int mib[2]; size_t size; struct clockinfo clockrate; mib[0] = CTL_KERN; mib[1] = KERN_CLOCKRATE; size = sizeof clockrate; if (sysctl(mib, 2, &clockrate, &size, NULL, 0) == -1) err(1, "sysctl kern.clockrate"); return clockrate.stathz; } #define STAT_MODE_NPMCS 6 #define FIXED_MODE_NPMCS 2 static struct timespec before_ts; #define CYCLES 0 #define INST 1 #define BR 2 #define IAP_START BR #define BR_MISS 3 #define CACHE 4 #define CACHE_MISS 5 static const char *pmc_stat_mode_names[] = { "cycles", "instructions", "branches", "branch-misses", "cache-references", "cache-misses", }; static int pmcstat_sockpair[NSOCKPAIRFD]; static void usage(void) { errx(EX_USAGE, "\t get basic stats from command line program\n" "\t -j , --events comma-delimited list of event specifiers\n" ); } static void showtime(FILE *out, struct timespec *before, struct timespec *after, struct rusage *ru) { char decimal_point; uint64_t real, user, sys; (void)setlocale(LC_NUMERIC, ""); decimal_point = localeconv()->decimal_point[0]; after->tv_sec -= before->tv_sec; after->tv_nsec -= before->tv_nsec; if (after->tv_nsec < 0) after->tv_sec--, after->tv_nsec += 1000000000; real = (after->tv_sec * 1000000000 + after->tv_nsec) / 1000; user = ru->ru_utime.tv_sec * 1000000 + ru->ru_utime.tv_usec; sys = ru->ru_stime.tv_sec * 1000000 + ru->ru_stime.tv_usec; fprintf(out, "%13jd%c%02ld real\t\t\t#\t%2.02f%% cpu\n", (intmax_t)after->tv_sec, decimal_point, after->tv_nsec / 10000000, 100 * (double)(sys + user + 1) / (double)(real + 1)); fprintf(out, "%13jd%c%02ld user\t\t\t#\t%2.2f%% cpu\n", (intmax_t)ru->ru_utime.tv_sec, decimal_point, ru->ru_utime.tv_usec / 10000, 100 * (double)(user + 1) / (double)(real + 1)); fprintf(out, "%13jd%c%02ld sys\t\t\t#\t%2.02f%% cpu\n", (intmax_t)ru->ru_stime.tv_sec, decimal_point, ru->ru_stime.tv_usec / 10000, 100 * (double)(sys + 1) / (double)(real + 1)); } static const char *stat_mode_cntrs[STAT_MODE_NPMCS]; static const char *stat_mode_names[STAT_MODE_NPMCS]; static void pmc_stat_setup_stat(int system_mode, const char *arg) { const char *new_cntrs[STAT_MODE_NPMCS]; static const char **pmc_stat_mode_cntrs; struct pmcstat_ev *ev; char *counters, *counter; int i, c, start, newcnt; cpuset_t cpumask, rootmask; if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, sizeof(rootmask), &rootmask) == -1) err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs"); CPU_COPY(&rootmask, &cpumask); if (pmc_pmu_stat_mode(&pmc_stat_mode_cntrs) != 0) errx(EX_USAGE, "ERROR: hwmpc.ko not loaded or stat not supported on host."); if (system_mode && geteuid() != 0) errx(EX_USAGE, "ERROR: system mode counters can only be used as root"); counters = NULL; for (i = 0; i < STAT_MODE_NPMCS; i++) { stat_mode_cntrs[i] = pmc_stat_mode_cntrs[i]; stat_mode_names[i] = pmc_stat_mode_names[i]; } if (arg) { counters = strdup(arg); newcnt = 0; while ((counter = strsep(&counters, ",")) != NULL && newcnt < STAT_MODE_NPMCS - IAP_START) { new_cntrs[newcnt++] = counter; if (pmc_pmu_sample_rate_get(counter) == DEFAULT_SAMPLE_COUNT) errx(EX_USAGE, "ERROR: %s not recognized on host", counter); } start = IAP_START + STAT_MODE_NPMCS - FIXED_MODE_NPMCS - newcnt; for (i = 0; i < newcnt; i++) { stat_mode_cntrs[start + i] = new_cntrs[i]; stat_mode_names[start + i] = new_cntrs[i]; } } if (system_mode) pmc_args.pa_flags |= FLAG_HAS_SYSTEM_PMCS; else pmc_args.pa_flags |= FLAG_HAS_PROCESS_PMCS; pmc_args.pa_flags |= FLAG_HAS_COUNTING_PMCS; pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET; pmc_args.pa_flags |= FLAG_HAS_PIPE; pmc_args.pa_required |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET | FLAG_HAS_OUTPUT_LOGFILE; pmc_args.pa_outputpath = strdup("/dev/null"); pmc_args.pa_logfd = pmcstat_open_log(pmc_args.pa_outputpath, PMCSTAT_OPEN_FOR_WRITE); for (i = 0; i < STAT_MODE_NPMCS; i++) { if ((ev = malloc(sizeof(*ev))) == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); if (system_mode) ev->ev_mode = PMC_MODE_SC; else ev->ev_mode = PMC_MODE_TC; ev->ev_spec = strdup(stat_mode_cntrs[i]); if (ev->ev_spec == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); c = strcspn(strdup(stat_mode_cntrs[i]), ", \t"); ev->ev_name = malloc(c + 1); if (ev->ev_name == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); (void)strncpy(ev->ev_name, stat_mode_cntrs[i], c); *(ev->ev_name + c) = '\0'; ev->ev_count = -1; ev->ev_flags = 0; ev->ev_flags |= PMC_F_DESCENDANTS; ev->ev_cumulative = 1; ev->ev_saved = 0LL; ev->ev_pmcid = PMC_ID_INVALID; STAILQ_INSERT_TAIL(&pmc_args.pa_events, ev, ev_next); if (system_mode) { ev->ev_cpu = CPU_FFS(&cpumask) - 1; CPU_CLR(ev->ev_cpu, &cpumask); pmcstat_clone_event_descriptor(ev, &cpumask, &pmc_args); CPU_SET(ev->ev_cpu, &cpumask); } else ev->ev_cpu = PMC_CPU_ANY; } if (clock_gettime(CLOCK_MONOTONIC, &before_ts)) err(1, "clock_gettime"); } static void pmc_stat_print_stat(struct rusage *ru) { struct pmcstat_ev *ev; struct timespec after; uint64_t cvals[STAT_MODE_NPMCS]; uint64_t ticks, value; int hz, i; if (ru) { hz = getstathz(); ticks = hz * (ru->ru_utime.tv_sec + ru->ru_stime.tv_sec) + hz * (ru->ru_utime.tv_usec + ru->ru_stime.tv_usec) / 1000000; if (clock_gettime(CLOCK_MONOTONIC, &after)) err(1, "clock_gettime"); /* * If our round-off on the tick calculation still puts us at 0, * then always assume at least one tick. */ if (ticks == 0) ticks = 1; fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n", ru->ru_minflt, "page faults", ((double)ru->ru_minflt / (double)ticks) / hz); fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n", ru->ru_nvcsw, "voluntary csw", ((double)ru->ru_nvcsw / (double)ticks) / hz); fprintf(pmc_args.pa_printfile, "%16ld %s\t#\t%02.03f M/sec\n", ru->ru_nivcsw, "involuntary csw", ((double)ru->ru_nivcsw / (double)ticks) / hz); } bzero(&cvals, sizeof(cvals)); STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) { if (pmc_read(ev->ev_pmcid, &value) < 0) err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"", ev->ev_name); for (i = 0; i < STAT_MODE_NPMCS; i++) if (strcmp(ev->ev_name, stat_mode_cntrs[i]) == 0) cvals[i] += value; } fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[CYCLES], stat_mode_names[CYCLES]); fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%01.03f inst/cycle\n", (uintmax_t)cvals[INST], stat_mode_names[INST], (double)cvals[INST] / cvals[CYCLES]); fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[BR], stat_mode_names[BR]); if (stat_mode_names[BR_MISS] == pmc_stat_mode_names[BR_MISS]) fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%.03f%%\n", (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS], 100 * ((double)cvals[BR_MISS] / cvals[BR])); else fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS]); fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE], stat_mode_names[CACHE], stat_mode_names[CACHE] != pmc_stat_mode_names[CACHE] ? "\n" : ""); if (stat_mode_names[CACHE] == pmc_stat_mode_names[CACHE]) fprintf(pmc_args.pa_printfile, "\t#\t%.03f refs/inst\n", ((double)cvals[CACHE] / cvals[INST])); fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE_MISS], stat_mode_names[CACHE_MISS], stat_mode_names[CACHE_MISS] != pmc_stat_mode_names[CACHE_MISS] ? "\n" : ""); if (stat_mode_names[CACHE_MISS] == pmc_stat_mode_names[CACHE_MISS]) fprintf(pmc_args.pa_printfile, "\t\t#\t%.03f%%\n", 100 * ((double)cvals[CACHE_MISS] / cvals[CACHE])); if (ru) showtime(pmc_args.pa_printfile, &before_ts, &after, ru); } static struct option longopts[] = { {"events", required_argument, NULL, 'j'}, {NULL, 0, NULL, 0} }; static int pmc_stat_internal(int argc, char **argv, int system_mode) { char *event, *r; struct sigaction sa; struct kevent kev; struct rusage ru; struct winsize ws; struct pmcstat_ev *ev; int c, option, runstate; int waitstatus, ru_valid, do_debug; do_debug = ru_valid = 0; r = event = NULL; while ((option = getopt_long(argc, argv, "dj:", longopts, NULL)) != -1) { switch (option) { case 'j': r = event = strdup(optarg); break; case 'd': do_debug = 1; break; case '?': default: usage(); } } pmc_args.pa_argc = (argc -= optind); pmc_args.pa_argv = (argv += optind); if (argc == 0) usage(); pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE; pmc_stat_setup_stat(system_mode, event); free(r); bzero(&ru, sizeof(ru)); EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT"); EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO"); EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 1000, NULL); if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for timer"); STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) { if (pmc_allocate(ev->ev_spec, ev->ev_mode, - ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0) + ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid, ev->ev_count) < 0) err(EX_OSERR, "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"", PMC_IS_SYSTEM_MODE(ev->ev_mode) ? "system" : "process", ev->ev_spec); if (PMC_IS_SAMPLING_MODE(ev->ev_mode) && pmc_set(ev->ev_pmcid, ev->ev_count) < 0) err(EX_OSERR, "ERROR: Cannot set sampling count for PMC \"%s\"", ev->ev_name); } /* * An exec() failure of a forked child is signalled by the * child sending the parent a SIGCHLD. We don't register an * actual signal handler for SIGCHLD, but instead use our * kqueue to pick up the signal. */ EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD"); pmcstat_create_process(pmcstat_sockpair, &pmc_args, pmc_kq); if (SLIST_EMPTY(&pmc_args.pa_targets)) errx(EX_DATAERR, "ERROR: No matching target processes."); if (pmc_args.pa_flags & FLAG_HAS_PROCESS_PMCS) pmcstat_attach_pmcs(&pmc_args); /* start the pmcs */ pmc_util_start_pmcs(&pmc_args); /* start the (commandline) process if needed */ pmcstat_start_process(pmcstat_sockpair); /* Handle SIGINT using the kqueue loop */ sa.sa_handler = SIG_IGN; sa.sa_flags = 0; (void)sigemptyset(&sa.sa_mask); if (sigaction(SIGINT, &sa, NULL) < 0) err(EX_OSERR, "ERROR: Cannot install signal handler"); /* * loop till either the target process (if any) exits, or we * are killed by a SIGINT or we reached the time duration. */ runstate = PMCSTAT_RUNNING; do { if ((c = kevent(pmc_kq, NULL, 0, &kev, 1, NULL)) <= 0) { if (errno != EINTR) err(EX_OSERR, "ERROR: kevent failed"); else continue; } if (kev.flags & EV_ERROR) errc(EX_OSERR, kev.data, "ERROR: kevent failed"); switch (kev.filter) { case EVFILT_PROC: /* target has exited */ if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) { getrusage(RUSAGE_CHILDREN, &ru); ru_valid = 1; } break; case EVFILT_READ: /* log file data is present */ break; case EVFILT_TIMER: if (do_debug) pmc_stat_print_stat(NULL); break; case EVFILT_SIGNAL: if (kev.ident == SIGCHLD) { /* * The child process sends us a * SIGCHLD if its exec() failed. We * wait for it to exit and then exit * ourselves. */ (void)wait(&c); runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGIO) { /* * We get a SIGIO if a PMC loses all * of its targets, or if logfile * writes encounter an error. */ if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) { getrusage(RUSAGE_CHILDREN, &ru); ru_valid = 1; } runstate = pmcstat_close_log(&pmc_args); } else if (kev.ident == SIGINT) { /* Kill the child process if we started it */ if (pmc_args.pa_flags & FLAG_HAS_COMMANDLINE) pmc_util_kill_process(&pmc_args); runstate = pmcstat_close_log(&pmc_args); } else if (kev.ident == SIGWINCH) { if (ioctl(fileno(pmc_args.pa_printfile), TIOCGWINSZ, &ws) < 0) err(EX_OSERR, "ERROR: Cannot determine window size"); pmc_displayheight = ws.ws_row - 1; pmc_displaywidth = ws.ws_col - 1; } else assert(0); break; } } while (runstate != PMCSTAT_FINISHED); if (!ru_valid) warnx("couldn't get rusage"); pmc_stat_print_stat(&ru); pmc_util_cleanup(&pmc_args); return (0); } int cmd_pmc_stat(int argc, char **argv) { return (pmc_stat_internal(argc, argv, 0)); } int cmd_pmc_stat_system(int argc, char **argv) { return (pmc_stat_internal(argc, argv, 1)); } diff --git a/usr.sbin/pmc/cmd_pmc_summary.cc b/usr.sbin/pmc/cmd_pmc_summary.cc new file mode 100644 index 000000000000..ea2ca6a4c7f5 --- /dev/null +++ b/usr.sbin/pmc/cmd_pmc_summary.cc @@ -0,0 +1,220 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018, Matthew Macy + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "cmd_pmc.h" + +#include +#include +#include +#include + +using std::unordered_map; +typedef unordered_map idmap; +typedef unordered_map intmap; +typedef unordered_map strintmap; +typedef std::pair sampleid; +typedef std::pair samplename; +typedef unordered_map > eventcountmap; + +#define P_KPROC 0x00004 /* Kernel process. */ + +static void +usage(void) +{ + errx(EX_USAGE, + "\t summarize log file\n" + "\t -k , --topk show topk processes for each counter\n" + ); +} + +static int +pmc_summary_handler(int logfd, int k, bool do_full) +{ + struct pmclog_parse_state *ps; + struct pmclog_ev ev; + idmap pidmap, tidmap, eventnamemap; + strintmap tideventmap, pideventmap; + intmap eventmap, pmcidmap, ratemap; + intmap kerntidmap, kernpidmap; + eventcountmap countmap; + + ps = static_cast(pmclog_open(logfd)); + if (ps == NULL) + errx(EX_OSERR, "ERROR: Cannot allocate pmclog parse state: %s\n", + strerror(errno)); + while (pmclog_read(ps, &ev) == 0) { + if (ev.pl_type == PMCLOG_TYPE_PMCALLOCATE) { + pmcidmap[ev.pl_u.pl_a.pl_pmcid] = ev.pl_u.pl_a.pl_event; + ratemap[ev.pl_u.pl_a.pl_event] = ev.pl_u.pl_a.pl_rate; + eventnamemap[ev.pl_u.pl_a.pl_event] = ev.pl_u.pl_a.pl_evname; + } + if (ev.pl_type == PMCLOG_TYPE_THR_CREATE) { + tidmap[ev.pl_u.pl_tc.pl_tid] = ev.pl_u.pl_tc.pl_tdname; + kerntidmap[ev.pl_u.pl_tc.pl_tid] = !!(ev.pl_u.pl_tc.pl_flags & P_KPROC); + if (tideventmap.find(ev.pl_u.pl_tc.pl_tdname) == tideventmap.end()) + tideventmap[ev.pl_u.pl_tc.pl_tdname] = intmap(); + } + if (ev.pl_type == PMCLOG_TYPE_PROC_CREATE) { + pidmap[ev.pl_u.pl_pc.pl_pid] = ev.pl_u.pl_pc.pl_pcomm; + kernpidmap[ev.pl_u.pl_pc.pl_pid] = !!(ev.pl_u.pl_pc.pl_flags & P_KPROC); + if (pideventmap.find(ev.pl_u.pl_pc.pl_pcomm) == pideventmap.end()) + pideventmap[ev.pl_u.pl_pc.pl_pcomm] = intmap(); + } + if (ev.pl_type == PMCLOG_TYPE_CALLCHAIN) { + auto event = pmcidmap[ev.pl_u.pl_cc.pl_pmcid]; + + if (event == 0) + continue; + eventmap[event]++; + auto tidname = tidmap.find(ev.pl_u.pl_cc.pl_tid); + auto pidname = pidmap.find(ev.pl_u.pl_cc.pl_pid); + if (tidname != tidmap.end()) { + auto &teventmap = tideventmap[tidname->second]; + teventmap[event]++; + } + if (pidname != pidmap.end()) { + auto &peventmap = pideventmap[pidname->second]; + peventmap[event]++; + } + } + } + for (auto &pkv : pideventmap) + for (auto &ekv : pkv.second) { + auto &samplevec = countmap[ekv.first]; + samplevec.emplace_back(ekv.second, pkv.first); + } + for (auto &kv : countmap) + std::sort(kv.second.begin(), kv.second.end(), [](auto &a, auto &b) {return (a.first < b.first);}); + if (do_full) { + for (auto &kv : countmap) { + auto &name = eventnamemap[kv.first]; + auto rate = ratemap[kv.first]; + std::cout << "idx: " << kv.first << " name: " << name << " rate: " << rate << std::endl; + while (!kv.second.empty()) { + auto &val = kv.second.back(); + kv.second.pop_back(); + std::cout << val.second << ": " << val.first << std::endl; + } + } + return (0); + } + for (auto &kv : countmap) { + auto &name = eventnamemap[kv.first]; + auto rate = ratemap[kv.first]; + std::cout << name << ":" << std::endl; + for (auto i = 0; i < k; i++) { + auto largest = kv.second.back(); + kv.second.pop_back(); + std::cout << "\t" << largest.second << ": " << largest.first*rate << std::endl; + } + } + return (0); +} + +static struct option longopts[] = { + {"full", no_argument, NULL, 'f'}, + {"topk", required_argument, NULL, 'k'}, + {NULL, 0, NULL, 0} +}; + +int +cmd_pmc_summary(int argc, char **argv) +{ + int option, logfd, k; + bool do_full; + + do_full = false; + k = 5; + while ((option = getopt_long(argc, argv, "k:f", longopts, NULL)) != -1) { + switch (option) { + case 'f': + do_full = 1; + break; + case 'k': + k = atoi(optarg); + break; + case '?': + default: + usage(); + } + } + argc -= optind; + argv += optind; + if (argc != 1) { + printf("argc: %d\n", argc); + for (int i = 0; i < argc; i++) + printf("%s\n", argv[i]); + usage(); + } + if ((logfd = open(argv[0], O_RDONLY, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) + errx(EX_OSERR, "ERROR: Cannot open \"%s\" for reading: %s.", argv[0], + strerror(errno)); + + return (pmc_summary_handler(logfd, k, do_full)); +} diff --git a/usr.sbin/pmc/pmc.c b/usr.sbin/pmc/pmc.c index fb8268fc224a..296ed0bb0201 100644 --- a/usr.sbin/pmc/pmc.c +++ b/usr.sbin/pmc/pmc.c @@ -1,118 +1,119 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cmd_pmc.h" int pmc_displayheight = DEFAULT_DISPLAY_HEIGHT; int pmc_displaywidth = DEFAULT_DISPLAY_WIDTH; int pmc_kq; struct pmcstat_args pmc_args; struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs); struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH]; struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH]; struct cmd_handler { const char *ch_name; cmd_disp_t ch_fn; }; static struct cmd_handler disp_table[] = { {"stat", cmd_pmc_stat}, {"stat-system", cmd_pmc_stat_system}, {"list-events", cmd_pmc_list_events}, {"filter", cmd_pmc_filter}, + {"summary", cmd_pmc_summary}, {NULL, NULL} }; static void usage(void) { errx(EX_USAGE, "\t pmc management utility\n" "\t stat run program and print stats\n" "\t stat-system run program and print system wide stats for duration of execution\n" "\t list-events list PMC events available on host\n" "\t filter filter records by lwp, pid, or event\n" ); } static cmd_disp_t disp_lookup(char *name) { struct cmd_handler *hnd; for (hnd = disp_table; hnd->ch_name != NULL; hnd++) if (strcmp(hnd->ch_name, name) == 0) return (hnd->ch_fn); return (NULL); } int main(int argc, char **argv) { cmd_disp_t disp; pmc_args.pa_printfile = stderr; STAILQ_INIT(&pmc_args.pa_events); SLIST_INIT(&pmc_args.pa_targets); if (argc == 1) usage(); if ((disp = disp_lookup(argv[1])) == NULL) usage(); argc--; argv++; /* Allocate a kqueue */ if ((pmc_kq = kqueue()) < 0) err(EX_OSERR, "ERROR: Cannot allocate kqueue"); if (pmc_init() < 0) err(EX_UNAVAILABLE, "ERROR: Initialization of the pmc(3) library failed" ); return (disp(argc, argv)); } diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c index f73de2b37385..838e8cac58b4 100644 --- a/usr.sbin/pmcstat/pmcstat.c +++ b/usr.sbin/pmcstat/pmcstat.c @@ -1,1460 +1,1461 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pmcstat.h" /* * A given invocation of pmcstat(8) can manage multiple PMCs of both * the system-wide and per-process variety. Each of these could be in * 'counting mode' or in 'sampling mode'. * * For 'counting mode' PMCs, pmcstat(8) will periodically issue a * pmc_read() at the configured time interval and print out the value * of the requested PMCs. * * For 'sampling mode' PMCs it can log to a file for offline analysis, * or can analyse sampling data "on the fly", either by converting * samples to printed textual form or by creating gprof(1) compatible * profiles, one per program executed. When creating gprof(1) * profiles it can optionally merge entries from multiple processes * for a given executable into a single profile file. * * pmcstat(8) can also execute a command line and attach PMCs to the * resulting child process. The protocol used is as follows: * * - parent creates a socketpair for two way communication and * fork()s. * - subsequently: * * /Parent/ /Child/ * * - Wait for childs token. * - Sends token. * - Awaits signal to start. * - Attaches PMCs to the child's pid * and starts them. Sets up * monitoring for the child. * - Signals child to start. * - Receives signal, attempts exec(). * * After this point normal processing can happen. */ /* Globals */ int pmcstat_displayheight = DEFAULT_DISPLAY_HEIGHT; int pmcstat_displaywidth = DEFAULT_DISPLAY_WIDTH; static int pmcstat_sockpair[NSOCKPAIRFD]; static int pmcstat_kq; static kvm_t *pmcstat_kvm; static struct kinfo_proc *pmcstat_plist; struct pmcstat_args args; static void pmcstat_get_cpumask(const char *cpuspec, cpuset_t *cpumask) { int cpu; const char *s; char *end; CPU_ZERO(cpumask); s = cpuspec; do { cpu = strtol(s, &end, 0); if (cpu < 0 || end == s) errx(EX_USAGE, "ERROR: Illegal CPU specification \"%s\".", cpuspec); CPU_SET(cpu, cpumask); s = end + strspn(end, ", \t"); } while (*s); assert(!CPU_EMPTY(cpumask)); } void pmcstat_cleanup(void) { struct pmcstat_ev *ev; /* release allocated PMCs. */ STAILQ_FOREACH(ev, &args.pa_events, ev_next) if (ev->ev_pmcid != PMC_ID_INVALID) { if (pmc_stop(ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: cannot stop pmc 0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); if (pmc_release(ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: cannot release pmc 0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); } /* de-configure the log file if present. */ if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) (void) pmc_configure_logfile(-1); if (args.pa_logparser) { pmclog_close(args.pa_logparser); args.pa_logparser = NULL; } pmcstat_log_shutdown_logging(); } void pmcstat_find_targets(const char *spec) { int n, nproc, pid, rv; struct pmcstat_target *pt; char errbuf[_POSIX2_LINE_MAX], *end; static struct kinfo_proc *kp; regex_t reg; regmatch_t regmatch; /* First check if we've been given a process id. */ pid = strtol(spec, &end, 0); if (end != spec && pid >= 0) { if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = pid; SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next); return; } /* Otherwise treat arg as a regular expression naming processes. */ if (pmcstat_kvm == NULL) { if ((pmcstat_kvm = kvm_openfiles(NULL, "/dev/null", NULL, 0, errbuf)) == NULL) err(EX_OSERR, "ERROR: Cannot open kernel \"%s\"", errbuf); if ((pmcstat_plist = kvm_getprocs(pmcstat_kvm, KERN_PROC_PROC, 0, &nproc)) == NULL) err(EX_OSERR, "ERROR: Cannot get process list: %s", kvm_geterr(pmcstat_kvm)); } else nproc = 0; if ((rv = regcomp(®, spec, REG_EXTENDED|REG_NOSUB)) != 0) { regerror(rv, ®, errbuf, sizeof(errbuf)); err(EX_DATAERR, "ERROR: Failed to compile regex \"%s\": %s", spec, errbuf); } for (n = 0, kp = pmcstat_plist; n < nproc; n++, kp++) { if ((rv = regexec(®, kp->ki_comm, 1, ®match, 0)) == 0) { if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = kp->ki_pid; SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next); } else if (rv != REG_NOMATCH) { regerror(rv, ®, errbuf, sizeof(errbuf)); errx(EX_SOFTWARE, "ERROR: Regex evalation failed: %s", errbuf); } } regfree(®); return; outofmemory: errx(EX_SOFTWARE, "Out of memory."); /*NOTREACHED*/ } void pmcstat_kill_process(void) { struct pmcstat_target *pt; assert(args.pa_flags & FLAG_HAS_COMMANDLINE); /* * If a command line was specified, it would be the very first * in the list, before any other processes specified by -t. */ pt = SLIST_FIRST(&args.pa_targets); assert(pt != NULL); if (kill(pt->pt_pid, SIGINT) != 0) err(EX_OSERR, "ERROR: cannot signal child process"); } void pmcstat_start_pmcs(void) { struct pmcstat_ev *ev; STAILQ_FOREACH(ev, &args.pa_events, ev_next) { assert(ev->ev_pmcid != PMC_ID_INVALID); if (pmc_start(ev->ev_pmcid) < 0) { warn("ERROR: Cannot start pmc 0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); pmcstat_cleanup(); exit(EX_OSERR); } } } void pmcstat_print_headers(void) { struct pmcstat_ev *ev; int c, w; (void) fprintf(args.pa_printfile, PRINT_HEADER_PREFIX); STAILQ_FOREACH(ev, &args.pa_events, ev_next) { if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) continue; c = PMC_IS_SYSTEM_MODE(ev->ev_mode) ? 's' : 'p'; if (ev->ev_fieldskip != 0) (void) fprintf(args.pa_printfile, "%*s", ev->ev_fieldskip, ""); w = ev->ev_fieldwidth - ev->ev_fieldskip - 2; if (c == 's') (void) fprintf(args.pa_printfile, "s/%02d/%-*s ", ev->ev_cpu, w-3, ev->ev_name); else (void) fprintf(args.pa_printfile, "p/%*s ", w, ev->ev_name); } (void) fflush(args.pa_printfile); } void pmcstat_print_counters(void) { int extra_width; struct pmcstat_ev *ev; pmc_value_t value; extra_width = sizeof(PRINT_HEADER_PREFIX) - 1; STAILQ_FOREACH(ev, &args.pa_events, ev_next) { /* skip sampling mode counters */ if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) continue; if (pmc_read(ev->ev_pmcid, &value) < 0) err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"", ev->ev_name); (void) fprintf(args.pa_printfile, "%*ju ", ev->ev_fieldwidth + extra_width, (uintmax_t) ev->ev_cumulative ? value : (value - ev->ev_saved)); if (ev->ev_cumulative == 0) ev->ev_saved = value; extra_width = 0; } (void) fflush(args.pa_printfile); } /* * Print output */ void pmcstat_print_pmcs(void) { static int linecount = 0; /* check if we need to print a header line */ if (++linecount > pmcstat_displayheight) { (void) fprintf(args.pa_printfile, "\n"); linecount = 1; } if (linecount == 1) pmcstat_print_headers(); (void) fprintf(args.pa_printfile, "\n"); pmcstat_print_counters(); return; } void pmcstat_show_usage(void) { errx(EX_USAGE, "[options] [commandline]\n" "\t Measure process and/or system performance using hardware\n" "\t performance monitoring counters.\n" "\t Options include:\n" "\t -C\t\t (toggle) show cumulative counts\n" "\t -D path\t create profiles in directory \"path\"\n" "\t -E\t\t (toggle) show counts at process exit\n" "\t -F file\t write a system-wide callgraph (Kcachegrind format)" " to \"file\"\n" "\t -G file\t write a system-wide callgraph to \"file\"\n" "\t -I\t\t don't resolve leaf function name, show address instead\n" "\t -L\t\t list all counters available on this host\n" "\t -M file\t print executable/gmon file map to \"file\"\n" "\t -N\t\t (toggle) capture callchains\n" "\t -O file\t send log output to \"file\"\n" "\t -P spec\t allocate a process-private sampling PMC\n" "\t -R file\t read events from \"file\"\n" "\t -S spec\t allocate a system-wide sampling PMC\n" "\t -T\t\t start in top mode\n" "\t -U \t\n merged user kernel stack capture\n" "\t -W\t\t (toggle) show counts per context switch\n" "\t -a file\t print sampled PCs and callgraph to \"file\"\n" "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n" "\t -d\t\t (toggle) track descendants\n" "\t -e\t\t use wide history counter for gprof(1) output\n" "\t -f spec\t pass \"spec\" to as plugin option\n" "\t -g\t\t produce gprof(1) compatible profiles\n" "\t -i lwp\t\t filter on thread id \"lwp\" in post-processing\n" "\t -k dir\t\t set the path to the kernel\n" "\t -l secs\t set duration time\n" "\t -m file\t print sampled PCs to \"file\"\n" "\t -n rate\t set sampling rate\n" "\t -o file\t send print output to \"file\"\n" "\t -p spec\t allocate a process-private counting PMC\n" "\t -q\t\t suppress verbosity\n" "\t -r fsroot\t specify FS root directory\n" "\t -s spec\t allocate a system-wide counting PMC\n" "\t -t process-spec attach to running processes matching " "\"process-spec\"\n" "\t -u spec \t provide short description of counters matching spec\n" "\t -v\t\t increase verbosity\n" "\t -w secs\t set printing time interval\n" "\t -z depth\t limit callchain display depth" ); } /* * At exit handler for top mode */ void pmcstat_topexit(void) { if (!args.pa_toptty) return; /* * Shutdown ncurses. */ clrtoeol(); refresh(); endwin(); } /* * Main */ int main(int argc, char **argv) { cpuset_t cpumask, rootmask; double interval; double duration; int option, npmc; int c, check_driver_stats; int do_callchain, do_descendants, do_logproccsw, do_logprocexit; int do_print, do_read, do_listcounters, do_descr; int do_userspace; size_t len; int graphdepth; int pipefd[2], rfd; int use_cumulative_counts; short cf, cb; uint64_t current_sampling_count; char *end, *tmp, *event; const char *errmsg, *graphfilename; enum pmcstat_state runstate; struct pmc_driverstats ds_start, ds_end; struct pmcstat_ev *ev; struct sigaction sa; struct kevent kev; struct winsize ws; struct stat sb; char buffer[PATH_MAX]; check_driver_stats = 0; current_sampling_count = 0; do_callchain = 1; do_descr = 0; do_descendants = 0; do_userspace = 0; do_logproccsw = 0; do_logprocexit = 0; do_listcounters = 0; use_cumulative_counts = 0; graphfilename = "-"; args.pa_required = 0; args.pa_flags = 0; args.pa_verbosity = 1; args.pa_logfd = -1; args.pa_fsroot = ""; args.pa_samplesdir = "."; args.pa_printfile = stderr; args.pa_graphdepth = DEFAULT_CALLGRAPH_DEPTH; args.pa_graphfile = NULL; args.pa_interval = DEFAULT_WAIT_INTERVAL; args.pa_mapfilename = NULL; args.pa_inputpath = NULL; args.pa_outputpath = NULL; args.pa_pplugin = PMCSTAT_PL_NONE; args.pa_plugin = PMCSTAT_PL_NONE; args.pa_ctdumpinstr = 1; args.pa_topmode = PMCSTAT_TOP_DELTA; args.pa_toptty = 0; args.pa_topcolor = 0; args.pa_mergepmc = 0; args.pa_duration = 0.0; STAILQ_INIT(&args.pa_events); SLIST_INIT(&args.pa_targets); bzero(&ds_start, sizeof(ds_start)); bzero(&ds_end, sizeof(ds_end)); ev = NULL; event = NULL; CPU_ZERO(&cpumask); /* Default to using the running system kernel. */ len = 0; if (sysctlbyname("kern.bootfile", NULL, &len, NULL, 0) == -1) err(EX_OSERR, "ERROR: Cannot determine path of running kernel"); args.pa_kernel = malloc(len); if (args.pa_kernel == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); if (sysctlbyname("kern.bootfile", args.pa_kernel, &len, NULL, 0) == -1) err(EX_OSERR, "ERROR: Cannot determine path of running kernel"); /* * The initial CPU mask specifies the root mask of this process * which is usually all CPUs in the system. */ if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, sizeof(rootmask), &rootmask) == -1) err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs"); CPU_COPY(&rootmask, &cpumask); while ((option = getopt(argc, argv, "CD:EF:G:ILM:NO:P:R:S:TUWZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) != -1) switch (option) { case 'a': /* Annotate + callgraph */ args.pa_flags |= FLAG_DO_ANNOTATE; args.pa_plugin = PMCSTAT_PL_ANNOTATE_CG; graphfilename = optarg; break; case 'C': /* cumulative values */ use_cumulative_counts = !use_cumulative_counts; args.pa_required |= FLAG_HAS_COUNTING_PMCS; break; case 'c': /* CPU */ if (optarg[0] == '*' && optarg[1] == '\0') CPU_COPY(&rootmask, &cpumask); else pmcstat_get_cpumask(optarg, &cpumask); args.pa_flags |= FLAGS_HAS_CPUMASK; args.pa_required |= FLAG_HAS_SYSTEM_PMCS; break; case 'D': if (stat(optarg, &sb) < 0) err(EX_OSERR, "ERROR: Cannot stat \"%s\"", optarg); if (!S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\" is not a directory.", optarg); args.pa_samplesdir = optarg; args.pa_flags |= FLAG_HAS_SAMPLESDIR; args.pa_required |= FLAG_DO_GPROF; break; case 'd': /* toggle descendents */ do_descendants = !do_descendants; args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; case 'E': /* log process exit */ do_logprocexit = !do_logprocexit; args.pa_required |= (FLAG_HAS_PROCESS_PMCS | FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE); break; case 'e': /* wide gprof metrics */ args.pa_flags |= FLAG_DO_WIDE_GPROF_HC; break; case 'F': /* produce a system-wide calltree */ args.pa_flags |= FLAG_DO_CALLGRAPHS; args.pa_plugin = PMCSTAT_PL_CALLTREE; graphfilename = optarg; break; case 'f': /* plugins options */ if (args.pa_plugin == PMCSTAT_PL_NONE) err(EX_USAGE, "ERROR: Need -g/-G/-m/-T."); pmcstat_pluginconfigure_log(optarg); break; case 'G': /* produce a system-wide callgraph */ args.pa_flags |= FLAG_DO_CALLGRAPHS; args.pa_plugin = PMCSTAT_PL_CALLGRAPH; graphfilename = optarg; break; case 'g': /* produce gprof compatible profiles */ args.pa_flags |= FLAG_DO_GPROF; args.pa_pplugin = PMCSTAT_PL_CALLGRAPH; args.pa_plugin = PMCSTAT_PL_GPROF; break; case 'I': args.pa_flags |= FLAG_SKIP_TOP_FN_RES; break; case 'i': args.pa_flags |= FLAG_FILTER_THREAD_ID; args.pa_tid = strtol(optarg, &end, 0); break; case 'k': /* pathname to the kernel */ free(args.pa_kernel); args.pa_kernel = strdup(optarg); if (args.pa_kernel == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory"); args.pa_required |= FLAG_DO_ANALYSIS; args.pa_flags |= FLAG_HAS_KERNELPATH; break; case 'L': do_listcounters = 1; break; case 'l': /* time duration in seconds */ duration = strtod(optarg, &end); if (*end != '\0' || duration <= 0) errx(EX_USAGE, "ERROR: Illegal duration time " "value \"%s\".", optarg); args.pa_flags |= FLAG_HAS_DURATION; args.pa_duration = duration; break; case 'm': args.pa_flags |= FLAG_DO_ANNOTATE; args.pa_plugin = PMCSTAT_PL_ANNOTATE; graphfilename = optarg; break; case 'M': /* mapfile */ args.pa_mapfilename = optarg; break; case 'N': do_callchain = !do_callchain; args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'p': /* process virtual counting PMC */ case 's': /* system-wide counting PMC */ case 'P': /* process virtual sampling PMC */ case 'S': /* system-wide sampling PMC */ if ((ev = malloc(sizeof(*ev))) == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); switch (option) { case 'p': ev->ev_mode = PMC_MODE_TC; break; case 's': ev->ev_mode = PMC_MODE_SC; break; case 'P': ev->ev_mode = PMC_MODE_TS; break; case 'S': ev->ev_mode = PMC_MODE_SS; break; } if (option == 'P' || option == 'p') { args.pa_flags |= FLAG_HAS_PROCESS_PMCS; args.pa_required |= (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET); } if (option == 'P' || option == 'S') { args.pa_flags |= FLAG_HAS_SAMPLING_PMCS; args.pa_required |= (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE); } if (option == 'p' || option == 's') args.pa_flags |= FLAG_HAS_COUNTING_PMCS; if (option == 's' || option == 'S') args.pa_flags |= FLAG_HAS_SYSTEM_PMCS; ev->ev_spec = strdup(optarg); if (ev->ev_spec == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); if (option == 'S' || option == 'P') ev->ev_count = current_sampling_count ? current_sampling_count : pmc_pmu_sample_rate_get(ev->ev_spec); else ev->ev_count = -1; if (option == 'S' || option == 's') ev->ev_cpu = CPU_FFS(&cpumask) - 1; else ev->ev_cpu = PMC_CPU_ANY; ev->ev_flags = 0; if (do_callchain) { ev->ev_flags |= PMC_F_CALLCHAIN; if (do_userspace) ev->ev_flags |= PMC_F_USERCALLCHAIN; } if (do_descendants) ev->ev_flags |= PMC_F_DESCENDANTS; if (do_logprocexit) ev->ev_flags |= PMC_F_LOG_PROCEXIT; if (do_logproccsw) ev->ev_flags |= PMC_F_LOG_PROCCSW; ev->ev_cumulative = use_cumulative_counts; ev->ev_saved = 0LL; ev->ev_pmcid = PMC_ID_INVALID; /* extract event name */ c = strcspn(optarg, ", \t"); ev->ev_name = malloc(c + 1); if (ev->ev_name == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); (void) strncpy(ev->ev_name, optarg, c); *(ev->ev_name + c) = '\0'; STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next); if (option == 's' || option == 'S') { CPU_CLR(ev->ev_cpu, &cpumask); pmcstat_clone_event_descriptor(ev, &cpumask, &args); CPU_SET(ev->ev_cpu, &cpumask); } break; case 'n': /* sampling count */ current_sampling_count = strtol(optarg, &end, 0); if (*end != '\0' || current_sampling_count <= 0) errx(EX_USAGE, "ERROR: Illegal count value \"%s\".", optarg); args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'o': /* outputfile */ if (args.pa_printfile != NULL && args.pa_printfile != stdout && args.pa_printfile != stderr) (void) fclose(args.pa_printfile); if ((args.pa_printfile = fopen(optarg, "w")) == NULL) errx(EX_OSERR, "ERROR: cannot open \"%s\" for writing.", optarg); args.pa_flags |= FLAG_DO_PRINT; break; case 'O': /* sampling output */ if (args.pa_outputpath) errx(EX_USAGE, "ERROR: option -O may only be specified once."); args.pa_outputpath = optarg; args.pa_flags |= FLAG_HAS_OUTPUT_LOGFILE; break; case 'q': /* quiet mode */ args.pa_verbosity = 0; break; case 'r': /* root FS path */ args.pa_fsroot = optarg; break; case 'R': /* read an existing log file */ if (args.pa_inputpath != NULL) errx(EX_USAGE, "ERROR: option -R may only be specified once."); args.pa_inputpath = optarg; if (args.pa_printfile == stderr) args.pa_printfile = stdout; args.pa_flags |= FLAG_READ_LOGFILE; break; case 't': /* target pid or process name */ pmcstat_find_targets(optarg); args.pa_flags |= FLAG_HAS_TARGET; args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; case 'T': /* top mode */ args.pa_flags |= FLAG_DO_TOP; args.pa_plugin = PMCSTAT_PL_CALLGRAPH; args.pa_ctdumpinstr = 0; args.pa_mergepmc = 1; if (args.pa_printfile == stderr) args.pa_printfile = stdout; break; case 'u': do_descr = 1; event = optarg; break; case 'U': /* toggle user-space callchain capture */ do_userspace = !do_userspace; args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'v': /* verbose */ args.pa_verbosity++; break; case 'w': /* wait interval */ interval = strtod(optarg, &end); if (*end != '\0' || interval <= 0) errx(EX_USAGE, "ERROR: Illegal wait interval value \"%s\".", optarg); args.pa_flags |= FLAG_HAS_WAIT_INTERVAL; args.pa_interval = interval; break; case 'W': /* toggle LOG_CSW */ do_logproccsw = !do_logproccsw; args.pa_required |= (FLAG_HAS_PROCESS_PMCS | FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE); break; case 'z': graphdepth = strtod(optarg, &end); if (*end != '\0' || graphdepth <= 0) errx(EX_USAGE, "ERROR: Illegal callchain depth \"%s\".", optarg); args.pa_graphdepth = graphdepth; args.pa_required |= FLAG_DO_CALLGRAPHS; break; case '?': default: pmcstat_show_usage(); break; } if ((do_listcounters | do_descr) && pmc_pmu_enabled() == 0) errx(EX_USAGE, "pmu features not supported on host or hwpmc not loaded"); if (do_listcounters) { pmc_pmu_print_counters(NULL); } else if (do_descr) { pmc_pmu_print_counter_desc(event); } if (do_listcounters | do_descr) exit(0); args.pa_argc = (argc -= optind); args.pa_argv = (argv += optind); /* If we read from logfile and no specified CPU mask use * the maximum CPU count. */ if ((args.pa_flags & FLAG_READ_LOGFILE) && (args.pa_flags & FLAGS_HAS_CPUMASK) == 0) CPU_FILL(&cpumask); args.pa_cpumask = cpumask; /* For selecting CPUs using -R. */ if (argc) /* command line present */ args.pa_flags |= FLAG_HAS_COMMANDLINE; if (args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS | FLAG_DO_ANNOTATE | FLAG_DO_TOP)) args.pa_flags |= FLAG_DO_ANALYSIS; /* * Check invocation syntax. */ /* disallow -O and -R together */ if (args.pa_outputpath && args.pa_inputpath) errx(EX_USAGE, "ERROR: options -O and -R are mutually exclusive."); /* disallow -T and -l together */ if ((args.pa_flags & FLAG_HAS_DURATION) && (args.pa_flags & FLAG_DO_TOP)) errx(EX_USAGE, "ERROR: options -T and -l are mutually " "exclusive."); /* -a and -m require -R */ if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_inputpath == NULL) errx(EX_USAGE, "ERROR: option %s requires an input file", args.pa_plugin == PMCSTAT_PL_ANNOTATE ? "-m" : "-a"); /* -m option is not allowed combined with -g or -G. */ if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS)) errx(EX_USAGE, "ERROR: option -m and -g | -G are mutually exclusive"); if (args.pa_flags & FLAG_READ_LOGFILE) { errmsg = NULL; if (args.pa_flags & FLAG_HAS_COMMANDLINE) errmsg = "a command line specification"; else if (args.pa_flags & FLAG_HAS_TARGET) errmsg = "option -t"; else if (!STAILQ_EMPTY(&args.pa_events)) errmsg = "a PMC event specification"; if (errmsg) errx(EX_USAGE, "ERROR: option -R may not be used with %s.", errmsg); } else if (STAILQ_EMPTY(&args.pa_events)) /* All other uses require a PMC spec. */ pmcstat_show_usage(); /* check for -t pid without a process PMC spec */ if ((args.pa_required & FLAG_HAS_TARGET) && (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0) errx(EX_USAGE, "ERROR: option -t requires a process mode PMC to be specified." ); /* check for process-mode options without a command or -t pid */ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) && (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0) errx(EX_USAGE, "ERROR: options -d, -E, -p, -P, and -W require a command line or target process." ); /* check for -p | -P without a target process of some sort */ if ((args.pa_required & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) && (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0) errx(EX_USAGE, "ERROR: options -P and -p require a target process or a command line." ); /* check for process-mode options without a process-mode PMC */ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) && (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0) errx(EX_USAGE, "ERROR: options -d, -E, and -W require a process mode PMC to be specified." ); /* check for -c cpu with no system mode PMCs or logfile. */ if ((args.pa_required & FLAG_HAS_SYSTEM_PMCS) && (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -c requires at least one system mode PMC to be specified." ); /* check for counting mode options without a counting PMC */ if ((args.pa_required & FLAG_HAS_COUNTING_PMCS) && (args.pa_flags & FLAG_HAS_COUNTING_PMCS) == 0) errx(EX_USAGE, "ERROR: options -C, -W and -o require at least one counting mode PMC to be specified." ); /* check for sampling mode options without a sampling PMC spec */ if ((args.pa_required & FLAG_HAS_SAMPLING_PMCS) && (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) == 0) errx(EX_USAGE, "ERROR: options -N, -n and -O require at least one sampling mode PMC to be specified." ); /* check if -g/-G/-m/-T are being used correctly */ if ((args.pa_flags & FLAG_DO_ANALYSIS) && !(args.pa_flags & (FLAG_HAS_SAMPLING_PMCS|FLAG_READ_LOGFILE))) errx(EX_USAGE, "ERROR: options -g/-G/-m/-T require sampling PMCs or -R to be specified." ); /* check if -e was specified without -g */ if ((args.pa_flags & FLAG_DO_WIDE_GPROF_HC) && !(args.pa_flags & FLAG_DO_GPROF)) errx(EX_USAGE, "ERROR: option -e requires gprof mode to be specified." ); /* check if -O was spuriously specified */ if ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) && (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -O is used only with options -E, -P, -S and -W." ); /* -k kernel path require -g/-G/-m/-T or -R */ if ((args.pa_flags & FLAG_HAS_KERNELPATH) && (args.pa_flags & FLAG_DO_ANALYSIS) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -k is only used with -g/-R/-m/-T."); /* -D only applies to gprof output mode (-g) */ if ((args.pa_flags & FLAG_HAS_SAMPLESDIR) && (args.pa_flags & FLAG_DO_GPROF) == 0) errx(EX_USAGE, "ERROR: option -D is only used with -g."); /* -M mapfile requires -g or -R */ if (args.pa_mapfilename != NULL && (args.pa_flags & FLAG_DO_GPROF) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -M is only used with -g/-R."); /* * Disallow textual output of sampling PMCs if counting PMCs * have also been asked for, mostly because the combined output * is difficult to make sense of. */ if ((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) && ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) == 0)) errx(EX_USAGE, "ERROR: option -O is required if counting and sampling PMCs are specified together." ); /* * Check if 'kerneldir' refers to a file rather than a * directory. If so, use `dirname path` to determine the * kernel directory. */ (void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot, args.pa_kernel); if (stat(buffer, &sb) < 0) err(EX_OSERR, "ERROR: Cannot locate kernel \"%s\"", buffer); if (!S_ISREG(sb.st_mode) && !S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\": Unsupported file type.", buffer); if (!S_ISDIR(sb.st_mode)) { tmp = args.pa_kernel; args.pa_kernel = strdup(dirname(args.pa_kernel)); if (args.pa_kernel == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory"); free(tmp); (void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot, args.pa_kernel); if (stat(buffer, &sb) < 0) err(EX_OSERR, "ERROR: Cannot stat \"%s\"", buffer); if (!S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\" is not a directory.", buffer); } /* * If we have a callgraph be created, select the outputfile. */ if (args.pa_flags & FLAG_DO_CALLGRAPHS) { if (strcmp(graphfilename, "-") == 0) args.pa_graphfile = args.pa_printfile; else { args.pa_graphfile = fopen(graphfilename, "w"); if (args.pa_graphfile == NULL) err(EX_OSERR, "ERROR: cannot open \"%s\" for writing", graphfilename); } } if (args.pa_flags & FLAG_DO_ANNOTATE) { args.pa_graphfile = fopen(graphfilename, "w"); if (args.pa_graphfile == NULL) err(EX_OSERR, "ERROR: cannot open \"%s\" for writing", graphfilename); } /* if we've been asked to process a log file, skip init */ if ((args.pa_flags & FLAG_READ_LOGFILE) == 0) { if (pmc_init() < 0) err(EX_UNAVAILABLE, "ERROR: Initialization of the pmc(3) library failed" ); if ((npmc = pmc_npmc(0)) < 0) /* assume all CPUs are identical */ err(EX_OSERR, "ERROR: Cannot determine the number of PMCs on CPU %d", 0); } /* Allocate a kqueue */ if ((pmcstat_kq = kqueue()) < 0) err(EX_OSERR, "ERROR: Cannot allocate kqueue"); /* Setup the logfile as the source. */ if (args.pa_flags & FLAG_READ_LOGFILE) { /* * Print the log in textual form if we haven't been * asked to generate profiling information. */ if ((args.pa_flags & FLAG_DO_ANALYSIS) == 0) args.pa_flags |= FLAG_DO_PRINT; pmcstat_log_initialize_logging(); rfd = pmcstat_open_log(args.pa_inputpath, PMCSTAT_OPEN_FOR_READ); if ((args.pa_logparser = pmclog_open(rfd)) == NULL) err(EX_OSERR, "ERROR: Cannot create parser"); if (fcntl(rfd, F_SETFL, O_NONBLOCK) < 0) err(EX_OSERR, "ERROR: fcntl(2) failed"); EV_SET(&kev, rfd, EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent"); } /* * Configure the specified log file or setup a default log * consumer via a pipe. */ if (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) { if (args.pa_outputpath) args.pa_logfd = pmcstat_open_log(args.pa_outputpath, PMCSTAT_OPEN_FOR_WRITE); else { /* * process the log on the fly by reading it in * through a pipe. */ if (pipe(pipefd) < 0) err(EX_OSERR, "ERROR: pipe(2) failed"); if (fcntl(pipefd[READPIPEFD], F_SETFL, O_NONBLOCK) < 0) err(EX_OSERR, "ERROR: fcntl(2) failed"); EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent"); args.pa_logfd = pipefd[WRITEPIPEFD]; args.pa_flags |= FLAG_HAS_PIPE; if ((args.pa_flags & FLAG_DO_TOP) == 0) args.pa_flags |= FLAG_DO_PRINT; args.pa_logparser = pmclog_open(pipefd[READPIPEFD]); } if (pmc_configure_logfile(args.pa_logfd) < 0) err(EX_OSERR, "ERROR: Cannot configure log file"); } /* remember to check for driver errors if we are sampling or logging */ check_driver_stats = (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) || (args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE); /* if (args.pa_flags & FLAG_READ_LOGFILE) { * Allocate PMCs. */ STAILQ_FOREACH(ev, &args.pa_events, ev_next) { if (pmc_allocate(ev->ev_spec, ev->ev_mode, - ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0) + ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid, + ev->ev_count) < 0) err(EX_OSERR, "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"", PMC_IS_SYSTEM_MODE(ev->ev_mode) ? "system" : "process", ev->ev_spec); if (PMC_IS_SAMPLING_MODE(ev->ev_mode) && pmc_set(ev->ev_pmcid, ev->ev_count) < 0) err(EX_OSERR, "ERROR: Cannot set sampling count for PMC \"%s\"", ev->ev_name); } /* compute printout widths */ STAILQ_FOREACH(ev, &args.pa_events, ev_next) { int counter_width; int display_width; int header_width; (void) pmc_width(ev->ev_pmcid, &counter_width); header_width = strlen(ev->ev_name) + 2; /* prefix '%c/' */ display_width = (int) floor(counter_width / 3.32193) + 1; if (PMC_IS_SYSTEM_MODE(ev->ev_mode)) header_width += 3; /* 2 digit CPU number + '/' */ if (header_width > display_width) { ev->ev_fieldskip = 0; ev->ev_fieldwidth = header_width; } else { ev->ev_fieldskip = display_width - header_width; ev->ev_fieldwidth = display_width; } } /* * If our output is being set to a terminal, register a handler * for window size changes. */ if (isatty(fileno(args.pa_printfile))) { if (ioctl(fileno(args.pa_printfile), TIOCGWINSZ, &ws) < 0) err(EX_OSERR, "ERROR: Cannot determine window size"); pmcstat_displayheight = ws.ws_row - 1; pmcstat_displaywidth = ws.ws_col - 1; EV_SET(&kev, SIGWINCH, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGWINCH"); args.pa_toptty = 1; } /* * Listen to key input in top mode. */ if (args.pa_flags & FLAG_DO_TOP) { EV_SET(&kev, fileno(stdin), EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent"); } EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT"); EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO"); /* * An exec() failure of a forked child is signalled by the * child sending the parent a SIGCHLD. We don't register an * actual signal handler for SIGCHLD, but instead use our * kqueue to pick up the signal. */ EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD"); /* * Setup a timer if we have counting mode PMCs needing to be printed or * top mode plugin is active. */ if (((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) || (args.pa_flags & FLAG_DO_TOP)) { EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, args.pa_interval * 1000, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for timer"); } /* * Setup a duration timer if we have sampling mode PMCs and * a duration time is set */ if ((args.pa_flags & FLAG_HAS_SAMPLING_PMCS) && (args.pa_flags & FLAG_HAS_DURATION)) { EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, args.pa_duration * 1000, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for " "time duration"); } /* attach PMCs to the target process, starting it if specified */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_create_process(pmcstat_sockpair, &args, pmcstat_kq); if (check_driver_stats && pmc_get_driver_stats(&ds_start) < 0) err(EX_OSERR, "ERROR: Cannot retrieve driver statistics"); /* Attach process pmcs to the target process. */ if (args.pa_flags & (FLAG_HAS_TARGET | FLAG_HAS_COMMANDLINE)) { if (SLIST_EMPTY(&args.pa_targets)) errx(EX_DATAERR, "ERROR: No matching target processes."); if (args.pa_flags & FLAG_HAS_PROCESS_PMCS) pmcstat_attach_pmcs(&args); if (pmcstat_kvm) { kvm_close(pmcstat_kvm); pmcstat_kvm = NULL; } } /* start the pmcs */ pmcstat_start_pmcs(); /* start the (commandline) process if needed */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_start_process(pmcstat_sockpair); /* initialize logging */ pmcstat_log_initialize_logging(); /* Handle SIGINT using the kqueue loop */ sa.sa_handler = SIG_IGN; sa.sa_flags = 0; (void) sigemptyset(&sa.sa_mask); if (sigaction(SIGINT, &sa, NULL) < 0) err(EX_OSERR, "ERROR: Cannot install signal handler"); /* * Setup the top mode display. */ if (args.pa_flags & FLAG_DO_TOP) { args.pa_flags &= ~FLAG_DO_PRINT; if (args.pa_toptty) { /* * Init ncurses. */ initscr(); if(has_colors() == TRUE) { args.pa_topcolor = 1; start_color(); use_default_colors(); pair_content(0, &cf, &cb); init_pair(1, COLOR_RED, cb); init_pair(2, COLOR_YELLOW, cb); init_pair(3, COLOR_GREEN, cb); } cbreak(); noecho(); nonl(); nodelay(stdscr, 1); intrflush(stdscr, FALSE); keypad(stdscr, TRUE); clear(); /* Get terminal width / height with ncurses. */ getmaxyx(stdscr, pmcstat_displayheight, pmcstat_displaywidth); pmcstat_displayheight--; pmcstat_displaywidth--; atexit(pmcstat_topexit); } } /* * loop till either the target process (if any) exits, or we * are killed by a SIGINT or we reached the time duration. */ runstate = PMCSTAT_RUNNING; do_print = do_read = 0; do { if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) { if (errno != EINTR) err(EX_OSERR, "ERROR: kevent failed"); else continue; } if (kev.flags & EV_ERROR) errc(EX_OSERR, kev.data, "ERROR: kevent failed"); switch (kev.filter) { case EVFILT_PROC: /* target has exited */ runstate = pmcstat_close_log(&args); do_print = 1; break; case EVFILT_READ: /* log file data is present */ if (kev.ident == (unsigned)fileno(stdin) && (args.pa_flags & FLAG_DO_TOP)) { if (pmcstat_keypress_log()) runstate = pmcstat_close_log(&args); } else { do_read = 0; runstate = pmcstat_process_log(); } break; case EVFILT_SIGNAL: if (kev.ident == SIGCHLD) { /* * The child process sends us a * SIGCHLD if its exec() failed. We * wait for it to exit and then exit * ourselves. */ (void) wait(&c); runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGIO) { /* * We get a SIGIO if a PMC loses all * of its targets, or if logfile * writes encounter an error. */ runstate = pmcstat_close_log(&args); do_print = 1; /* print PMCs at exit */ } else if (kev.ident == SIGINT) { /* Kill the child process if we started it */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_kill_process(); runstate = pmcstat_close_log(&args); } else if (kev.ident == SIGWINCH) { if (ioctl(fileno(args.pa_printfile), TIOCGWINSZ, &ws) < 0) err(EX_OSERR, "ERROR: Cannot determine window size"); pmcstat_displayheight = ws.ws_row - 1; pmcstat_displaywidth = ws.ws_col - 1; } else assert(0); break; case EVFILT_TIMER: /* time duration reached, exit */ if (args.pa_flags & FLAG_HAS_DURATION) { runstate = PMCSTAT_FINISHED; break; } /* print out counting PMCs */ if ((args.pa_flags & FLAG_DO_TOP) && (args.pa_flags & FLAG_HAS_PIPE) && pmc_flush_logfile() == 0) do_read = 1; do_print = 1; break; } if (do_print && !do_read) { if ((args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { pmcstat_print_pmcs(); if (runstate == PMCSTAT_FINISHED && /* final newline */ (args.pa_flags & FLAG_DO_PRINT) == 0) (void) fprintf(args.pa_printfile, "\n"); } if (args.pa_flags & FLAG_DO_TOP) pmcstat_display_log(); do_print = 0; } } while (runstate != PMCSTAT_FINISHED); if ((args.pa_flags & FLAG_DO_TOP) && args.pa_toptty) { pmcstat_topexit(); args.pa_toptty = 0; } /* flush any pending log entries */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) pmc_close_logfile(); pmcstat_cleanup(); /* check if the driver lost any samples or events */ if (check_driver_stats) { if (pmc_get_driver_stats(&ds_end) < 0) err(EX_OSERR, "ERROR: Cannot retrieve driver statistics"); if (ds_start.pm_intr_bufferfull != ds_end.pm_intr_bufferfull && args.pa_verbosity > 0) warnx( "WARNING: sampling was paused at least %u time%s.\n" "Please consider tuning the \"kern.hwpmc.nsamples\" tunable.", ds_end.pm_intr_bufferfull - ds_start.pm_intr_bufferfull, ((ds_end.pm_intr_bufferfull - ds_start.pm_intr_bufferfull) != 1) ? "s" : "" ); if (ds_start.pm_buffer_requests_failed != ds_end.pm_buffer_requests_failed && args.pa_verbosity > 0) warnx( "WARNING: at least %u event%s were discarded while running.\n" "Please consider tuning the \"kern.hwpmc.nbuffers\" tunable.", ds_end.pm_buffer_requests_failed - ds_start.pm_buffer_requests_failed, ((ds_end.pm_buffer_requests_failed - ds_start.pm_buffer_requests_failed) != 1) ? "s" : "" ); } exit(EX_OK); }