diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index a7ed1c3d9ac8..f10a64e28cac 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -1,1951 +1,1944 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003-2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include "libpmcinternal.h" /* Function prototypes */ #if defined(__amd64__) || defined(__i386__) static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__amd64__) || defined(__i386__) static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__arm__) static int armv7_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif #if defined(__aarch64__) static int arm64_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); static int cmn600_pmu_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); static int dmc620_pmu_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif static int soft_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #if defined(__powerpc__) static int powerpc_allocate_pmc(enum pmc_event _pe, char* ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif /* __powerpc__ */ #define PMC_CALL(op, params) syscall(pmc_syscall, (op), (params)) /* * Event aliases provide a way for the user to ask for generic events * like "cache-misses", or "instructions-retired". These aliases are * mapped to the appropriate canonical event descriptions using a * lookup table. */ struct pmc_event_alias { const char *pm_alias; const char *pm_spec; }; static const struct pmc_event_alias *pmc_mdep_event_aliases; /* * The pmc_event_descr structure maps symbolic names known to the user * to integer codes used by the PMC KLD. */ struct pmc_event_descr { const char *pm_ev_name; enum pmc_event pm_ev_code; }; /* * The pmc_class_descr structure maps class name prefixes for * event names to event tables and other PMC class data. */ struct pmc_class_descr { const char *pm_evc_name; size_t pm_evc_name_size; enum pmc_class pm_evc_class; const struct pmc_event_descr *pm_evc_event_table; size_t pm_evc_event_table_size; int (*pm_evc_allocate_pmc)(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pa); }; #define PMC_TABLE_SIZE(N) (sizeof(N)/sizeof(N[0])) #define PMC_EVENT_TABLE_SIZE(N) PMC_TABLE_SIZE(N##_event_table) #undef __PMC_EV #define __PMC_EV(C,N) { #N, PMC_EV_ ## C ## _ ## N }, /* * PMC_CLASSDEP_TABLE(NAME, CLASS) * * Define a table mapping event names and aliases to HWPMC event IDs. */ #define PMC_CLASSDEP_TABLE(N, C) \ static const struct pmc_event_descr N##_event_table[] = \ { \ __PMC_EV_##C() \ } PMC_CLASSDEP_TABLE(iaf, IAF); PMC_CLASSDEP_TABLE(k8, K8); PMC_CLASSDEP_TABLE(armv7, ARMV7); PMC_CLASSDEP_TABLE(armv8, ARMV8); PMC_CLASSDEP_TABLE(cmn600_pmu, CMN600_PMU); PMC_CLASSDEP_TABLE(dmc620_pmu_cd2, DMC620_PMU_CD2); PMC_CLASSDEP_TABLE(dmc620_pmu_c, DMC620_PMU_C); PMC_CLASSDEP_TABLE(ppc7450, PPC7450); PMC_CLASSDEP_TABLE(ppc970, PPC970); PMC_CLASSDEP_TABLE(e500, E500); static struct pmc_event_descr soft_event_table[PMC_EV_DYN_COUNT]; #undef __PMC_EV_ALIAS #define __PMC_EV_ALIAS(N,CODE) { N, PMC_EV_##CODE }, /* * TODO: Factor out the __PMC_EV_ARMV7/8 list into a single separate table * rather than duplicating for each core. */ static const struct pmc_event_descr cortex_a8_event_table[] = { __PMC_EV_ALIAS_ARMV7_CORTEX_A8() __PMC_EV_ARMV7() }; static const struct pmc_event_descr cortex_a9_event_table[] = { __PMC_EV_ALIAS_ARMV7_CORTEX_A9() __PMC_EV_ARMV7() }; static const struct pmc_event_descr cortex_a53_event_table[] = { __PMC_EV_ALIAS_ARMV8_CORTEX_A53() __PMC_EV_ARMV8() }; static const struct pmc_event_descr cortex_a57_event_table[] = { __PMC_EV_ALIAS_ARMV8_CORTEX_A57() __PMC_EV_ARMV8() }; static const struct pmc_event_descr cortex_a76_event_table[] = { __PMC_EV_ALIAS_ARMV8_CORTEX_A76() __PMC_EV_ARMV8() }; static const struct pmc_event_descr tsc_event_table[] = { __PMC_EV_ALIAS_TSC() }; #undef PMC_CLASS_TABLE_DESC #define PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR) \ static const struct pmc_class_descr NAME##_class_table_descr = \ { \ .pm_evc_name = #CLASS "-", \ .pm_evc_name_size = sizeof(#CLASS "-") - 1, \ .pm_evc_class = PMC_CLASS_##CLASS , \ .pm_evc_event_table = EVENTS##_event_table , \ .pm_evc_event_table_size = \ PMC_EVENT_TABLE_SIZE(EVENTS), \ .pm_evc_allocate_pmc = ALLOCATOR##_allocate_pmc \ } #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(k8, K8, k8, k8); #endif #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc); #endif #if defined(__arm__) PMC_CLASS_TABLE_DESC(cortex_a8, ARMV7, cortex_a8, armv7); PMC_CLASS_TABLE_DESC(cortex_a9, ARMV7, cortex_a9, armv7); #endif #if defined(__aarch64__) PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64); PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64); PMC_CLASS_TABLE_DESC(cortex_a76, ARMV8, cortex_a76, arm64); PMC_CLASS_TABLE_DESC(cmn600_pmu, CMN600_PMU, cmn600_pmu, cmn600_pmu); PMC_CLASS_TABLE_DESC(dmc620_pmu_cd2, DMC620_PMU_CD2, dmc620_pmu_cd2, dmc620_pmu); PMC_CLASS_TABLE_DESC(dmc620_pmu_c, DMC620_PMU_C, dmc620_pmu_c, dmc620_pmu); #endif #if defined(__powerpc__) PMC_CLASS_TABLE_DESC(ppc7450, PPC7450, ppc7450, powerpc); PMC_CLASS_TABLE_DESC(ppc970, PPC970, ppc970, powerpc); PMC_CLASS_TABLE_DESC(e500, E500, e500, powerpc); #endif static struct pmc_class_descr soft_class_table_descr = { .pm_evc_name = "SOFT-", .pm_evc_name_size = sizeof("SOFT-") - 1, .pm_evc_class = PMC_CLASS_SOFT, .pm_evc_event_table = NULL, .pm_evc_event_table_size = 0, .pm_evc_allocate_pmc = soft_allocate_pmc }; #undef PMC_CLASS_TABLE_DESC static const struct pmc_class_descr **pmc_class_table; #define PMC_CLASS_TABLE_SIZE cpu_info.pm_nclass /* * Mapping tables, mapping enumeration values to human readable * strings. */ static const char * pmc_capability_names[] = { #undef __PMC_CAP #define __PMC_CAP(N,V,D) #N , __PMC_CAPS() }; struct pmc_class_map { enum pmc_class pm_class; const char *pm_name; }; static const struct pmc_class_map pmc_class_names[] = { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) { .pm_class = PMC_CLASS_##S, .pm_name = #S } , __PMC_CLASSES() }; struct pmc_cputype_map { enum pmc_cputype pm_cputype; const char *pm_name; }; static const struct pmc_cputype_map pmc_cputype_names[] = { #undef __PMC_CPU #define __PMC_CPU(S, V, D) { .pm_cputype = PMC_CPU_##S, .pm_name = #S } , __PMC_CPUS() }; static const char * pmc_disposition_names[] = { #undef __PMC_DISP #define __PMC_DISP(D) #D , __PMC_DISPOSITIONS() }; static const char * pmc_mode_names[] = { #undef __PMC_MODE #define __PMC_MODE(M,N) #M , __PMC_MODES() }; static const char * pmc_state_names[] = { #undef __PMC_STATE #define __PMC_STATE(S) #S , __PMC_STATES() }; /* * Filled in by pmc_init(). */ static int pmc_syscall = -1; static struct pmc_cpuinfo cpu_info; static struct pmc_op_getdyneventinfo soft_event_info; /* Event masks for events */ struct pmc_masks { const char *pm_name; const uint64_t pm_value; }; #define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) } #define NULLMASK { .pm_name = NULL } #if defined(__amd64__) || defined(__i386__) static int pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint64_t *evmask) { const struct pmc_masks *pm; char *q, *r; int c; if (pmask == NULL) /* no mask keywords */ return (-1); q = strchr(p, '='); /* skip '=' */ if (*++q == '\0') /* no more data */ return (-1); c = 0; /* count of mask keywords seen */ while ((r = strsep(&q, "+")) != NULL) { for (pm = pmask; pm->pm_name && strcasecmp(r, pm->pm_name); pm++) ; if (pm->pm_name == NULL) /* not found */ return (-1); *evmask |= pm->pm_value; c++; } return (c); } #endif #define KWMATCH(p,kw) (strcasecmp((p), (kw)) == 0) #define KWPREFIXMATCH(p,kw) (strncasecmp((p), (kw), sizeof((kw)) - 1) == 0) #define EV_ALIAS(N,S) { .pm_alias = N, .pm_spec = S } #if defined(__amd64__) || defined(__i386__) /* * AMD K8 PMCs. * */ static struct pmc_event_alias k8_aliases[] = { EV_ALIAS("branches", "k8-fr-retired-taken-branches"), EV_ALIAS("branch-mispredicts", "k8-fr-retired-taken-branches-mispredicted"), EV_ALIAS("cycles", "tsc"), EV_ALIAS("dc-misses", "k8-dc-miss"), EV_ALIAS("ic-misses", "k8-ic-miss"), EV_ALIAS("instructions", "k8-fr-retired-x86-instructions"), EV_ALIAS("interrupts", "k8-fr-taken-hardware-interrupts"), EV_ALIAS("unhalted-cycles", "k8-bu-cpu-clk-unhalted"), EV_ALIAS(NULL, NULL) }; #define __K8MASK(N,V) PMCMASK(N,(1 << (V))) /* * Parsing tables */ /* fp dispatched fpu ops */ static const struct pmc_masks k8_mask_fdfo[] = { __K8MASK(add-pipe-excluding-junk-ops, 0), __K8MASK(multiply-pipe-excluding-junk-ops, 1), __K8MASK(store-pipe-excluding-junk-ops, 2), __K8MASK(add-pipe-junk-ops, 3), __K8MASK(multiply-pipe-junk-ops, 4), __K8MASK(store-pipe-junk-ops, 5), NULLMASK }; /* ls segment register loads */ static const struct pmc_masks k8_mask_lsrl[] = { __K8MASK(es, 0), __K8MASK(cs, 1), __K8MASK(ss, 2), __K8MASK(ds, 3), __K8MASK(fs, 4), __K8MASK(gs, 5), __K8MASK(hs, 6), NULLMASK }; /* ls locked operation */ static const struct pmc_masks k8_mask_llo[] = { __K8MASK(locked-instructions, 0), __K8MASK(cycles-in-request, 1), __K8MASK(cycles-to-complete, 2), NULLMASK }; /* dc refill from {l2,system} and dc copyback */ static const struct pmc_masks k8_mask_dc[] = { __K8MASK(invalid, 0), __K8MASK(shared, 1), __K8MASK(exclusive, 2), __K8MASK(owner, 3), __K8MASK(modified, 4), NULLMASK }; /* dc one bit ecc error */ static const struct pmc_masks k8_mask_dobee[] = { __K8MASK(scrubber, 0), __K8MASK(piggyback, 1), NULLMASK }; /* dc dispatched prefetch instructions */ static const struct pmc_masks k8_mask_ddpi[] = { __K8MASK(load, 0), __K8MASK(store, 1), __K8MASK(nta, 2), NULLMASK }; /* dc dcache accesses by locks */ static const struct pmc_masks k8_mask_dabl[] = { __K8MASK(accesses, 0), __K8MASK(misses, 1), NULLMASK }; /* bu internal l2 request */ static const struct pmc_masks k8_mask_bilr[] = { __K8MASK(ic-fill, 0), __K8MASK(dc-fill, 1), __K8MASK(tlb-reload, 2), __K8MASK(tag-snoop, 3), __K8MASK(cancelled, 4), NULLMASK }; /* bu fill request l2 miss */ static const struct pmc_masks k8_mask_bfrlm[] = { __K8MASK(ic-fill, 0), __K8MASK(dc-fill, 1), __K8MASK(tlb-reload, 2), NULLMASK }; /* bu fill into l2 */ static const struct pmc_masks k8_mask_bfil[] = { __K8MASK(dirty-l2-victim, 0), __K8MASK(victim-from-l2, 1), NULLMASK }; /* fr retired fpu instructions */ static const struct pmc_masks k8_mask_frfi[] = { __K8MASK(x87, 0), __K8MASK(mmx-3dnow, 1), __K8MASK(packed-sse-sse2, 2), __K8MASK(scalar-sse-sse2, 3), NULLMASK }; /* fr retired fastpath double op instructions */ static const struct pmc_masks k8_mask_frfdoi[] = { __K8MASK(low-op-pos-0, 0), __K8MASK(low-op-pos-1, 1), __K8MASK(low-op-pos-2, 2), NULLMASK }; /* fr fpu exceptions */ static const struct pmc_masks k8_mask_ffe[] = { __K8MASK(x87-reclass-microfaults, 0), __K8MASK(sse-retype-microfaults, 1), __K8MASK(sse-reclass-microfaults, 2), __K8MASK(sse-and-x87-microtraps, 3), NULLMASK }; /* nb memory controller page access event */ static const struct pmc_masks k8_mask_nmcpae[] = { __K8MASK(page-hit, 0), __K8MASK(page-miss, 1), __K8MASK(page-conflict, 2), NULLMASK }; /* nb memory controller turnaround */ static const struct pmc_masks k8_mask_nmct[] = { __K8MASK(dimm-turnaround, 0), __K8MASK(read-to-write-turnaround, 1), __K8MASK(write-to-read-turnaround, 2), NULLMASK }; /* nb memory controller bypass saturation */ static const struct pmc_masks k8_mask_nmcbs[] = { __K8MASK(memory-controller-hi-pri-bypass, 0), __K8MASK(memory-controller-lo-pri-bypass, 1), __K8MASK(dram-controller-interface-bypass, 2), __K8MASK(dram-controller-queue-bypass, 3), NULLMASK }; /* nb sized commands */ static const struct pmc_masks k8_mask_nsc[] = { __K8MASK(nonpostwrszbyte, 0), __K8MASK(nonpostwrszdword, 1), __K8MASK(postwrszbyte, 2), __K8MASK(postwrszdword, 3), __K8MASK(rdszbyte, 4), __K8MASK(rdszdword, 5), __K8MASK(rdmodwr, 6), NULLMASK }; /* nb probe result */ static const struct pmc_masks k8_mask_npr[] = { __K8MASK(probe-miss, 0), __K8MASK(probe-hit, 1), __K8MASK(probe-hit-dirty-no-memory-cancel, 2), __K8MASK(probe-hit-dirty-with-memory-cancel, 3), NULLMASK }; /* nb hypertransport bus bandwidth */ static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */ __K8MASK(command, 0), __K8MASK(data, 1), __K8MASK(buffer-release, 2), __K8MASK(nop, 3), NULLMASK }; #undef __K8MASK #define K8_KW_COUNT "count" #define K8_KW_EDGE "edge" #define K8_KW_INV "inv" #define K8_KW_MASK "mask" #define K8_KW_OS "os" #define K8_KW_USR "usr" static int k8_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; int n; uint32_t count; uint64_t evmask; const struct pmc_masks *pm, *pmask; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); pmc_config->pm_md.pm_amd.pm_amd_config = 0; pmask = NULL; evmask = 0; #define __K8SETMASK(M) pmask = k8_mask_##M /* setup parsing tables */ switch (pe) { case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: __K8SETMASK(fdfo); break; case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD: __K8SETMASK(lsrl); break; case PMC_EV_K8_LS_LOCKED_OPERATION: __K8SETMASK(llo); break; case PMC_EV_K8_DC_REFILL_FROM_L2: case PMC_EV_K8_DC_REFILL_FROM_SYSTEM: case PMC_EV_K8_DC_COPYBACK: __K8SETMASK(dc); break; case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR: __K8SETMASK(dobee); break; case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS: __K8SETMASK(ddpi); break; case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: __K8SETMASK(dabl); break; case PMC_EV_K8_BU_INTERNAL_L2_REQUEST: __K8SETMASK(bilr); break; case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS: __K8SETMASK(bfrlm); break; case PMC_EV_K8_BU_FILL_INTO_L2: __K8SETMASK(bfil); break; case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: __K8SETMASK(frfi); break; case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: __K8SETMASK(frfdoi); break; case PMC_EV_K8_FR_FPU_EXCEPTIONS: __K8SETMASK(ffe); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT: __K8SETMASK(nmcpae); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND: __K8SETMASK(nmct); break; case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION: __K8SETMASK(nmcbs); break; case PMC_EV_K8_NB_SIZED_COMMANDS: __K8SETMASK(nsc); break; case PMC_EV_K8_NB_PROBE_RESULT: __K8SETMASK(npr); break; case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH: case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH: case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH: __K8SETMASK(nhbb); break; default: break; /* no options defined */ } while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); count = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_caps |= PMC_CAP_THRESHOLD; pmc_config->pm_md.pm_amd.pm_amd_config |= AMD_PMC_TO_COUNTER(count); } else if (KWMATCH(p, K8_KW_EDGE)) { pmc_config->pm_caps |= PMC_CAP_EDGE; } else if (KWMATCH(p, K8_KW_INV)) { pmc_config->pm_caps |= PMC_CAP_INVERT; } else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) { if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0) return (-1); pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } else if (KWMATCH(p, K8_KW_OS)) { pmc_config->pm_caps |= PMC_CAP_SYSTEM; } else if (KWMATCH(p, K8_KW_USR)) { pmc_config->pm_caps |= PMC_CAP_USER; } else return (-1); } /* other post processing */ switch (pe) { case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED: case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS: case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: case PMC_EV_K8_FR_FPU_EXCEPTIONS: /* XXX only available in rev B and later */ break; case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: /* XXX only available in rev C and later */ break; case PMC_EV_K8_LS_LOCKED_OPERATION: /* XXX CPU Rev A,B evmask is to be zero */ if (evmask & (evmask - 1)) /* > 1 bit set */ return (-1); if (evmask == 0) { evmask = 0x01; /* Rev C and later: #instrs */ pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } break; default: if (evmask == 0 && pmask != NULL) { for (pm = pmask; pm->pm_name; pm++) evmask |= pm->pm_value; pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } } if (pmc_config->pm_caps & PMC_CAP_QUALIFIER) pmc_config->pm_md.pm_amd.pm_amd_config = AMD_PMC_TO_UNITMASK(evmask); return (0); } #endif #if defined(__i386__) || defined(__amd64__) static int tsc_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { if (pe != PMC_EV_TSC_TSC) return (-1); /* TSC events must be unqualified. */ if (ctrspec && *ctrspec != '\0') return (-1); pmc_config->pm_md.pm_amd.pm_amd_config = 0; pmc_config->pm_caps |= PMC_CAP_READ; return (0); } #endif static struct pmc_event_alias generic_aliases[] = { EV_ALIAS("instructions", "SOFT-CLOCK.HARD"), EV_ALIAS(NULL, NULL) }; static int soft_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { (void)ctrspec; (void)pmc_config; if ((int)pe < PMC_EV_SOFT_FIRST || (int)pe > PMC_EV_SOFT_LAST) return (-1); pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); return (0); } #if defined(__arm__) static struct pmc_event_alias cortex_a8_aliases[] = { EV_ALIAS("dc-misses", "L1_DCACHE_REFILL"), EV_ALIAS("ic-misses", "L1_ICACHE_REFILL"), EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias cortex_a9_aliases[] = { EV_ALIAS("dc-misses", "L1_DCACHE_REFILL"), EV_ALIAS("ic-misses", "L1_ICACHE_REFILL"), EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS(NULL, NULL) }; static int armv7_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { switch (pe) { default: break; } return (0); } #endif #if defined(__aarch64__) static struct pmc_event_alias cortex_a53_aliases[] = { EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias cortex_a57_aliases[] = { EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias cortex_a76_aliases[] = { EV_ALIAS(NULL, NULL) }; static int arm64_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *p; while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWMATCH(p, "os")) pmc_config->pm_caps |= PMC_CAP_SYSTEM; else if (KWMATCH(p, "usr")) pmc_config->pm_caps |= PMC_CAP_USER; else return (-1); } return (0); } static int cmn600_pmu_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { uint32_t nodeid, occupancy, xpport, xpchannel; char *e, *p, *q; unsigned int i; char *xpport_names[] = { "East", "West", "North", "South", "devport0", "devport1" }; char *xpchannel_names[] = { "REQ", "RSP", "SNP", "DAT" }; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); pmc_config->pm_caps |= PMC_CAP_SYSTEM; pmc_config->pm_md.pm_cmn600.pma_cmn600_config = 0; /* * CMN600 extra fields: * * nodeid - node coordinates x[2-3],y[2-3],p[1],s[2] * width of x and y fields depend on matrix size. * * occupancy - numeric value to select desired filter. * * xpport - East, West, North, South, devport0, devport1 (or 0, 1, ..., 5) * * xpchannel - REQ, RSP, SNP, DAT (or 0, 1, 2, 3) */ while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWPREFIXMATCH(p, "nodeid=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); nodeid = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_md.pm_cmn600.pma_cmn600_nodeid |= nodeid; } else if (KWPREFIXMATCH(p, "occupancy=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); occupancy = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_md.pm_cmn600.pma_cmn600_occupancy = occupancy; } else if (KWPREFIXMATCH(p, "xpport=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); xpport = strtol(q, &e, 0); if (e == q || *e != '\0') { for (i = 0; i < nitems(xpport_names); i++) { if (strcasecmp(xpport_names[i], q) == 0) { xpport = i; break; } } if (i == nitems(xpport_names)) return (-1); } pmc_config->pm_md.pm_cmn600.pma_cmn600_config |= xpport << 2; } else if (KWPREFIXMATCH(p, "xpchannel=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); xpchannel = strtol(q, &e, 0); if (e == q || *e != '\0') { for (i = 0; i < nitems(xpchannel_names); i++) { if (strcasecmp(xpchannel_names[i], q) == 0) { xpchannel = i; break; } } if (i == nitems(xpchannel_names)) return (-1); } pmc_config->pm_md.pm_cmn600.pma_cmn600_config |= xpchannel << 5; } else return (-1); } return (0); } static int dmc620_pmu_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; uint64_t match, mask; uint32_t count; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); pmc_config->pm_caps |= PMC_CAP_SYSTEM; pmc_config->pm_md.pm_dmc620.pm_dmc620_config = 0; while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWPREFIXMATCH(p, "count=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); count = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_caps |= PMC_CAP_THRESHOLD; pmc_config->pm_md.pm_dmc620.pm_dmc620_config |= count; } else if (KWMATCH(p, "inv")) { pmc_config->pm_caps |= PMC_CAP_INVERT; } else if (KWPREFIXMATCH(p, "match=")) { match = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_caps |= PMC_CAP_QUALIFIER; pmc_config->pm_md.pm_dmc620.pm_dmc620_match = match; } else if (KWPREFIXMATCH(p, "mask=")) { q = strchr(p, '='); if (*++q == '\0') /* skip '=' */ return (-1); mask = strtol(q, &e, 0); if (e == q || *e != '\0') return (-1); pmc_config->pm_md.pm_dmc620.pm_dmc620_mask = mask; pmc_config->pm_caps |= PMC_CAP_QUALIFIER; } else return (-1); } return (0); } #endif #if defined(__powerpc__) static struct pmc_event_alias ppc7450_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("branches", "BRANCHES_COMPLETED"), EV_ALIAS("branch-mispredicts", "MISPREDICTED_BRANCHES"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias ppc970_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("cycles", "CYCLES"), EV_ALIAS(NULL, NULL) }; static struct pmc_event_alias e500_aliases[] = { EV_ALIAS("instructions", "INSTR_COMPLETED"), EV_ALIAS("cycles", "CYCLES"), EV_ALIAS(NULL, NULL) }; #define POWERPC_KW_OS "os" #define POWERPC_KW_USR "usr" #define POWERPC_KW_ANYTHREAD "anythread" static int powerpc_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, struct pmc_op_pmcallocate *pmc_config __unused) { char *p; (void) pe; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWMATCH(p, POWERPC_KW_OS)) pmc_config->pm_caps |= PMC_CAP_SYSTEM; else if (KWMATCH(p, POWERPC_KW_USR)) pmc_config->pm_caps |= PMC_CAP_USER; else if (KWMATCH(p, POWERPC_KW_ANYTHREAD)) pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM); else return (-1); } return (0); } #endif /* __powerpc__ */ /* * Match an event name `name' with its canonical form. * * Matches are case insensitive and spaces, periods, underscores and * hyphen characters are considered to match each other. * * Returns 1 for a match, 0 otherwise. */ static int pmc_match_event_name(const char *name, const char *canonicalname) { int cc, nc; const unsigned char *c, *n; c = (const unsigned char *) canonicalname; n = (const unsigned char *) name; for (; (nc = *n) && (cc = *c); n++, c++) { if ((nc == ' ' || nc == '_' || nc == '-' || nc == '.') && (cc == ' ' || cc == '_' || cc == '-' || cc == '.')) continue; if (toupper(nc) == toupper(cc)) continue; return (0); } if (*n == '\0' && *c == '\0') return (1); return (0); } /* * Match an event name against all the event named supported by a * PMC class. * * Returns an event descriptor pointer on match or NULL otherwise. */ static const struct pmc_event_descr * pmc_match_event_class(const char *name, const struct pmc_class_descr *pcd) { size_t n; const struct pmc_event_descr *ev; ev = pcd->pm_evc_event_table; for (n = 0; n < pcd->pm_evc_event_table_size; n++, ev++) if (pmc_match_event_name(name, ev->pm_ev_name)) return (ev); return (NULL); } /* * API entry points */ int pmc_allocate(const char *ctrspec, enum pmc_mode mode, uint32_t flags, int cpu, pmc_id_t *pmcid, uint64_t count) { size_t n; int retval; char *r, *spec_copy; const char *ctrname; const struct pmc_event_descr *ev; const struct pmc_event_alias *alias; struct pmc_op_pmcallocate pmc_config; const struct pmc_class_descr *pcd; spec_copy = NULL; retval = -1; if (mode != PMC_MODE_SS && mode != PMC_MODE_TS && mode != PMC_MODE_SC && mode != PMC_MODE_TC) { errno = EINVAL; goto out; } bzero(&pmc_config, sizeof(pmc_config)); pmc_config.pm_cpu = cpu; pmc_config.pm_mode = mode; pmc_config.pm_flags = flags; pmc_config.pm_count = count; if (PMC_IS_SAMPLING_MODE(mode)) pmc_config.pm_caps |= PMC_CAP_INTERRUPT; /* * Try to pull the raw event ID directly from the pmu-events table. If * this is unsupported on the platform, or the event is not found, * continue with searching the regular event tables. */ r = spec_copy = strdup(ctrspec); ctrname = strsep(&r, ","); if (pmc_pmu_enabled()) { - if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0) { - /* - * XXX: pmclog_get_event exploits this to disambiguate - * PMU from PMC event codes in PMCALLOCATE events. - */ - assert(pmc_config.pm_ev < PMC_EVENT_FIRST); + if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0) goto found; - } } free(spec_copy); spec_copy = NULL; /* replace an event alias with the canonical event specifier */ if (pmc_mdep_event_aliases) for (alias = pmc_mdep_event_aliases; alias->pm_alias; alias++) if (!strcasecmp(ctrspec, alias->pm_alias)) { spec_copy = strdup(alias->pm_spec); break; } if (spec_copy == NULL) spec_copy = strdup(ctrspec); r = spec_copy; ctrname = strsep(&r, ","); /* * If a explicit class prefix was given by the user, restrict the * search for the event to the specified PMC class. */ ev = NULL; for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) { pcd = pmc_class_table[n]; if (pcd != NULL && strncasecmp(ctrname, pcd->pm_evc_name, pcd->pm_evc_name_size) == 0) { if ((ev = pmc_match_event_class(ctrname + pcd->pm_evc_name_size, pcd)) == NULL) { errno = EINVAL; goto out; } break; } } /* * Otherwise, search for this event in all compatible PMC * classes. */ for (n = 0; ev == NULL && n < PMC_CLASS_TABLE_SIZE; n++) { pcd = pmc_class_table[n]; if (pcd != NULL) ev = pmc_match_event_class(ctrname, pcd); } if (ev == NULL) { errno = EINVAL; goto out; } pmc_config.pm_ev = ev->pm_ev_code; pmc_config.pm_class = pcd->pm_evc_class; if (pcd->pm_evc_allocate_pmc(ev->pm_ev_code, r, &pmc_config) < 0) { errno = EINVAL; goto out; } found: if (PMC_CALL(PMC_OP_PMCALLOCATE, &pmc_config) == 0) { *pmcid = pmc_config.pm_pmcid; retval = 0; } out: if (spec_copy) free(spec_copy); return (retval); } int pmc_attach(pmc_id_t pmc, pid_t pid) { struct pmc_op_pmcattach pmc_attach_args; pmc_attach_args.pm_pmc = pmc; pmc_attach_args.pm_pid = pid; return (PMC_CALL(PMC_OP_PMCATTACH, &pmc_attach_args)); } int pmc_capabilities(pmc_id_t pmcid, uint32_t *caps) { unsigned int i; enum pmc_class cl; cl = PMC_ID_TO_CLASS(pmcid); for (i = 0; i < cpu_info.pm_nclass; i++) if (cpu_info.pm_classes[i].pm_class == cl) { *caps = cpu_info.pm_classes[i].pm_caps; return (0); } errno = EINVAL; return (-1); } int pmc_configure_logfile(int fd) { struct pmc_op_configurelog cla; cla.pm_flags = 0; cla.pm_logfd = fd; if (PMC_CALL(PMC_OP_CONFIGURELOG, &cla) < 0) return (-1); return (0); } int pmc_cpuinfo(const struct pmc_cpuinfo **pci) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } *pci = &cpu_info; return (0); } int pmc_detach(pmc_id_t pmc, pid_t pid) { struct pmc_op_pmcattach pmc_detach_args; pmc_detach_args.pm_pmc = pmc; pmc_detach_args.pm_pid = pid; return (PMC_CALL(PMC_OP_PMCDETACH, &pmc_detach_args)); } int pmc_disable(int cpu, int pmc) { struct pmc_op_pmcadmin ssa; ssa.pm_cpu = cpu; ssa.pm_pmc = pmc; ssa.pm_state = PMC_STATE_DISABLED; return (PMC_CALL(PMC_OP_PMCADMIN, &ssa)); } int pmc_enable(int cpu, int pmc) { struct pmc_op_pmcadmin ssa; ssa.pm_cpu = cpu; ssa.pm_pmc = pmc; ssa.pm_state = PMC_STATE_FREE; return (PMC_CALL(PMC_OP_PMCADMIN, &ssa)); } /* * Return a list of events known to a given PMC class. 'cl' is the * PMC class identifier, 'eventnames' is the returned list of 'const * char *' pointers pointing to the names of the events. 'nevents' is * the number of event name pointers returned. * * The space for 'eventnames' is allocated using malloc(3). The caller * is responsible for freeing this space when done. */ int pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, int *nevents) { int count; const char **names; const struct pmc_event_descr *ev; switch (cl) { case PMC_CLASS_IAF: ev = iaf_event_table; count = PMC_EVENT_TABLE_SIZE(iaf); break; case PMC_CLASS_TSC: ev = tsc_event_table; count = PMC_EVENT_TABLE_SIZE(tsc); break; case PMC_CLASS_K8: ev = k8_event_table; count = PMC_EVENT_TABLE_SIZE(k8); break; case PMC_CLASS_ARMV7: switch (cpu_info.pm_cputype) { default: case PMC_CPU_ARMV7_CORTEX_A8: ev = cortex_a8_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a8); break; case PMC_CPU_ARMV7_CORTEX_A9: ev = cortex_a9_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a9); break; } break; case PMC_CLASS_ARMV8: switch (cpu_info.pm_cputype) { default: case PMC_CPU_ARMV8_CORTEX_A53: ev = cortex_a53_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a53); break; case PMC_CPU_ARMV8_CORTEX_A57: ev = cortex_a57_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a57); break; case PMC_CPU_ARMV8_CORTEX_A76: ev = cortex_a76_event_table; count = PMC_EVENT_TABLE_SIZE(cortex_a76); break; } break; case PMC_CLASS_CMN600_PMU: ev = cmn600_pmu_event_table; count = PMC_EVENT_TABLE_SIZE(cmn600_pmu); break; case PMC_CLASS_DMC620_PMU_CD2: ev = dmc620_pmu_cd2_event_table; count = PMC_EVENT_TABLE_SIZE(dmc620_pmu_cd2); break; case PMC_CLASS_DMC620_PMU_C: ev = dmc620_pmu_c_event_table; count = PMC_EVENT_TABLE_SIZE(dmc620_pmu_c); break; case PMC_CLASS_PPC7450: ev = ppc7450_event_table; count = PMC_EVENT_TABLE_SIZE(ppc7450); break; case PMC_CLASS_PPC970: ev = ppc970_event_table; count = PMC_EVENT_TABLE_SIZE(ppc970); break; case PMC_CLASS_E500: ev = e500_event_table; count = PMC_EVENT_TABLE_SIZE(e500); break; case PMC_CLASS_SOFT: ev = soft_event_table; count = soft_event_info.pm_nevent; break; default: errno = EINVAL; return (-1); } if ((names = malloc(count * sizeof(const char *))) == NULL) return (-1); *eventnames = names; *nevents = count; for (;count--; ev++, names++) *names = ev->pm_ev_name; return (0); } int pmc_flush_logfile(void) { return (PMC_CALL(PMC_OP_FLUSHLOG, 0)); } int pmc_close_logfile(void) { return (PMC_CALL(PMC_OP_CLOSELOG, 0)); } int pmc_get_driver_stats(struct pmc_driverstats *ds) { struct pmc_op_getdriverstats gms; if (PMC_CALL(PMC_OP_GETDRIVERSTATS, &gms) < 0) return (-1); /* copy out fields in the current userland<->library interface */ ds->pm_intr_ignored = gms.pm_intr_ignored; ds->pm_intr_processed = gms.pm_intr_processed; ds->pm_intr_bufferfull = gms.pm_intr_bufferfull; ds->pm_syscalls = gms.pm_syscalls; ds->pm_syscall_errors = gms.pm_syscall_errors; ds->pm_buffer_requests = gms.pm_buffer_requests; ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed; ds->pm_log_sweeps = gms.pm_log_sweeps; return (0); } int pmc_get_msr(pmc_id_t pmc, uint32_t *msr) { struct pmc_op_getmsr gm; gm.pm_pmcid = pmc; if (PMC_CALL(PMC_OP_PMCGETMSR, &gm) < 0) return (-1); *msr = gm.pm_msr; return (0); } int pmc_init(void) { int error, pmc_mod_id; unsigned int n; uint32_t abi_version; struct module_stat pmc_modstat; struct pmc_op_getcpuinfo op_cpu_info; if (pmc_syscall != -1) /* already inited */ return (0); /* retrieve the system call number from the KLD */ if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0) return (-1); pmc_modstat.version = sizeof(struct module_stat); if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0) return (-1); pmc_syscall = pmc_modstat.data.intval; /* check the kernel module's ABI against our compiled-in version */ abi_version = PMC_VERSION; if (PMC_CALL(PMC_OP_GETMODULEVERSION, &abi_version) < 0) return (pmc_syscall = -1); /* ignore patch & minor numbers for the comparison */ if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) { errno = EPROGMISMATCH; return (pmc_syscall = -1); } bzero(&op_cpu_info, sizeof(op_cpu_info)); if (PMC_CALL(PMC_OP_GETCPUINFO, &op_cpu_info) < 0) return (pmc_syscall = -1); cpu_info.pm_cputype = op_cpu_info.pm_cputype; cpu_info.pm_ncpu = op_cpu_info.pm_ncpu; cpu_info.pm_npmc = op_cpu_info.pm_npmc; cpu_info.pm_nclass = op_cpu_info.pm_nclass; for (n = 0; n < op_cpu_info.pm_nclass; n++) memcpy(&cpu_info.pm_classes[n], &op_cpu_info.pm_classes[n], sizeof(cpu_info.pm_classes[n])); pmc_class_table = calloc(PMC_CLASS_TABLE_SIZE, sizeof(struct pmc_class_descr *)); if (pmc_class_table == NULL) return (-1); /* * Get soft events list. */ soft_event_info.pm_class = PMC_CLASS_SOFT; if (PMC_CALL(PMC_OP_GETDYNEVENTINFO, &soft_event_info) < 0) return (pmc_syscall = -1); /* Map soft events to static list. */ for (n = 0; n < soft_event_info.pm_nevent; n++) { soft_event_table[n].pm_ev_name = soft_event_info.pm_events[n].pm_ev_name; soft_event_table[n].pm_ev_code = soft_event_info.pm_events[n].pm_ev_code; } soft_class_table_descr.pm_evc_event_table_size = \ soft_event_info.pm_nevent; soft_class_table_descr.pm_evc_event_table = \ soft_event_table; /* * Fill in the class table. */ n = 0; for (unsigned i = 0; i < PMC_CLASS_TABLE_SIZE; i++) { switch (cpu_info.pm_classes[i].pm_class) { #if defined(__amd64__) || defined(__i386__) case PMC_CLASS_TSC: pmc_class_table[n++] = &tsc_class_table_descr; break; case PMC_CLASS_K8: pmc_class_table[n++] = &k8_class_table_descr; break; #endif case PMC_CLASS_SOFT: pmc_class_table[n++] = &soft_class_table_descr; break; #if defined(__arm__) case PMC_CLASS_ARMV7: switch (cpu_info.pm_cputype) { case PMC_CPU_ARMV7_CORTEX_A8: pmc_class_table[n++] = &cortex_a8_class_table_descr; break; case PMC_CPU_ARMV7_CORTEX_A9: pmc_class_table[n++] = &cortex_a9_class_table_descr; break; default: errno = ENXIO; return (pmc_syscall = -1); } break; #endif #if defined(__aarch64__) case PMC_CLASS_ARMV8: switch (cpu_info.pm_cputype) { case PMC_CPU_ARMV8_CORTEX_A53: pmc_class_table[n++] = &cortex_a53_class_table_descr; break; case PMC_CPU_ARMV8_CORTEX_A57: pmc_class_table[n++] = &cortex_a57_class_table_descr; break; case PMC_CPU_ARMV8_CORTEX_A76: pmc_class_table[n++] = &cortex_a76_class_table_descr; break; default: errno = ENXIO; return (pmc_syscall = -1); } break; case PMC_CLASS_DMC620_PMU_CD2: pmc_class_table[n++] = &dmc620_pmu_cd2_class_table_descr; break; case PMC_CLASS_DMC620_PMU_C: pmc_class_table[n++] = &dmc620_pmu_c_class_table_descr; break; case PMC_CLASS_CMN600_PMU: pmc_class_table[n++] = &cmn600_pmu_class_table_descr; break; #endif #if defined(__powerpc__) case PMC_CLASS_PPC7450: pmc_class_table[n++] = &ppc7450_class_table_descr; break; case PMC_CLASS_PPC970: pmc_class_table[n++] = &ppc970_class_table_descr; break; case PMC_CLASS_E500: pmc_class_table[n++] = &e500_class_table_descr; break; #endif default: #if defined(DEBUG) printf("pm_class: 0x%x\n", cpu_info.pm_classes[i].pm_class); #endif break; } } #define PMC_MDEP_INIT(C) pmc_mdep_event_aliases = C##_aliases /* Configure the event name parser. */ switch (cpu_info.pm_cputype) { #if defined(__amd64__) || defined(__i386__) case PMC_CPU_AMD_K8: PMC_MDEP_INIT(k8); break; #endif case PMC_CPU_GENERIC: PMC_MDEP_INIT(generic); break; #if defined(__arm__) case PMC_CPU_ARMV7_CORTEX_A8: PMC_MDEP_INIT(cortex_a8); break; case PMC_CPU_ARMV7_CORTEX_A9: PMC_MDEP_INIT(cortex_a9); break; #endif #if defined(__aarch64__) case PMC_CPU_ARMV8_CORTEX_A53: PMC_MDEP_INIT(cortex_a53); break; case PMC_CPU_ARMV8_CORTEX_A57: PMC_MDEP_INIT(cortex_a57); break; case PMC_CPU_ARMV8_CORTEX_A76: PMC_MDEP_INIT(cortex_a76); break; #endif #if defined(__powerpc__) case PMC_CPU_PPC_7450: PMC_MDEP_INIT(ppc7450); break; case PMC_CPU_PPC_970: PMC_MDEP_INIT(ppc970); break; case PMC_CPU_PPC_E500: PMC_MDEP_INIT(e500); break; #endif default: /* * Some kind of CPU this version of the library knows nothing * about. This shouldn't happen since the abi version check * should have caught this. */ #if defined(__amd64__) || defined(__i386__) || defined(__powerpc64__) break; #endif errno = ENXIO; return (pmc_syscall = -1); } return (0); } const char * pmc_name_of_capability(enum pmc_caps cap) { int i; /* * 'cap' should have a single bit set and should be in * range. */ if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST || cap > PMC_CAP_LAST) { errno = EINVAL; return (NULL); } i = ffs(cap); return (pmc_capability_names[i - 1]); } const char * pmc_name_of_class(enum pmc_class pc) { size_t n; for (n = 0; n < PMC_TABLE_SIZE(pmc_class_names); n++) if (pc == pmc_class_names[n].pm_class) return (pmc_class_names[n].pm_name); errno = EINVAL; return (NULL); } const char * pmc_name_of_cputype(enum pmc_cputype cp) { size_t n; for (n = 0; n < PMC_TABLE_SIZE(pmc_cputype_names); n++) if (cp == pmc_cputype_names[n].pm_cputype) return (pmc_cputype_names[n].pm_name); errno = EINVAL; return (NULL); } const char * pmc_name_of_disposition(enum pmc_disp pd) { if ((int) pd >= PMC_DISP_FIRST && pd <= PMC_DISP_LAST) return (pmc_disposition_names[pd]); errno = EINVAL; return (NULL); } const char * _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) { const struct pmc_event_descr *ev, *evfence; ev = evfence = NULL; if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) { ev = k8_event_table; evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8); } else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) { switch (cpu) { case PMC_CPU_ARMV7_CORTEX_A8: ev = cortex_a8_event_table; evfence = cortex_a8_event_table + PMC_EVENT_TABLE_SIZE(cortex_a8); break; case PMC_CPU_ARMV7_CORTEX_A9: ev = cortex_a9_event_table; evfence = cortex_a9_event_table + PMC_EVENT_TABLE_SIZE(cortex_a9); break; default: /* Unknown CPU type. */ break; } } else if (pe >= PMC_EV_ARMV8_FIRST && pe <= PMC_EV_ARMV8_LAST) { switch (cpu) { case PMC_CPU_ARMV8_CORTEX_A53: ev = cortex_a53_event_table; evfence = cortex_a53_event_table + PMC_EVENT_TABLE_SIZE(cortex_a53); break; case PMC_CPU_ARMV8_CORTEX_A57: ev = cortex_a57_event_table; evfence = cortex_a57_event_table + PMC_EVENT_TABLE_SIZE(cortex_a57); break; case PMC_CPU_ARMV8_CORTEX_A76: ev = cortex_a76_event_table; evfence = cortex_a76_event_table + PMC_EVENT_TABLE_SIZE(cortex_a76); break; default: /* Unknown CPU type. */ break; } } else if (pe >= PMC_EV_CMN600_PMU_FIRST && pe <= PMC_EV_CMN600_PMU_LAST) { ev = cmn600_pmu_event_table; evfence = cmn600_pmu_event_table + PMC_EVENT_TABLE_SIZE(cmn600_pmu); } else if (pe >= PMC_EV_DMC620_PMU_CD2_FIRST && pe <= PMC_EV_DMC620_PMU_CD2_LAST) { ev = dmc620_pmu_cd2_event_table; evfence = dmc620_pmu_cd2_event_table + PMC_EVENT_TABLE_SIZE(dmc620_pmu_cd2); } else if (pe >= PMC_EV_DMC620_PMU_C_FIRST && pe <= PMC_EV_DMC620_PMU_C_LAST) { ev = dmc620_pmu_c_event_table; evfence = dmc620_pmu_c_event_table + PMC_EVENT_TABLE_SIZE(dmc620_pmu_c); } else if (pe >= PMC_EV_PPC7450_FIRST && pe <= PMC_EV_PPC7450_LAST) { ev = ppc7450_event_table; evfence = ppc7450_event_table + PMC_EVENT_TABLE_SIZE(ppc7450); } else if (pe >= PMC_EV_PPC970_FIRST && pe <= PMC_EV_PPC970_LAST) { ev = ppc970_event_table; evfence = ppc970_event_table + PMC_EVENT_TABLE_SIZE(ppc970); } else if (pe >= PMC_EV_E500_FIRST && pe <= PMC_EV_E500_LAST) { ev = e500_event_table; evfence = e500_event_table + PMC_EVENT_TABLE_SIZE(e500); } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); } else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) { ev = soft_event_table; evfence = soft_event_table + soft_event_info.pm_nevent; } for (; ev != evfence; ev++) if (pe == ev->pm_ev_code) return (ev->pm_ev_name); return (NULL); } const char * pmc_name_of_event(enum pmc_event pe) { const char *n; if ((n = _pmc_name_of_event(pe, cpu_info.pm_cputype)) != NULL) return (n); errno = EINVAL; return (NULL); } const char * pmc_name_of_mode(enum pmc_mode pm) { if ((int) pm >= PMC_MODE_FIRST && pm <= PMC_MODE_LAST) return (pmc_mode_names[pm]); errno = EINVAL; return (NULL); } const char * pmc_name_of_state(enum pmc_state ps) { if ((int) ps >= PMC_STATE_FIRST && ps <= PMC_STATE_LAST) return (pmc_state_names[ps]); errno = EINVAL; return (NULL); } int pmc_ncpu(void) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } return (cpu_info.pm_ncpu); } int pmc_npmc(int cpu) { if (pmc_syscall == -1) { errno = ENXIO; return (-1); } if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) { errno = EINVAL; return (-1); } return (cpu_info.pm_npmc); } int pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci) { int nbytes, npmc; struct pmc_op_getpmcinfo *pmci; if ((npmc = pmc_npmc(cpu)) < 0) return (-1); nbytes = sizeof(struct pmc_op_getpmcinfo) + npmc * sizeof(struct pmc_info); if ((pmci = calloc(1, nbytes)) == NULL) return (-1); pmci->pm_cpu = cpu; if (PMC_CALL(PMC_OP_GETPMCINFO, pmci) < 0) { free(pmci); return (-1); } /* kernel<->library, library<->userland interfaces are identical */ *ppmci = (struct pmc_pmcinfo *) pmci; return (0); } int pmc_read(pmc_id_t pmc, pmc_value_t *value) { struct pmc_op_pmcrw pmc_read_op; pmc_read_op.pm_pmcid = pmc; pmc_read_op.pm_flags = PMC_F_OLDVALUE; pmc_read_op.pm_value = -1; if (PMC_CALL(PMC_OP_PMCRW, &pmc_read_op) < 0) return (-1); *value = pmc_read_op.pm_value; return (0); } int pmc_release(pmc_id_t pmc) { struct pmc_op_simple pmc_release_args; pmc_release_args.pm_pmcid = pmc; return (PMC_CALL(PMC_OP_PMCRELEASE, &pmc_release_args)); } int pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep) { struct pmc_op_pmcrw pmc_rw_op; pmc_rw_op.pm_pmcid = pmc; pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE; pmc_rw_op.pm_value = newvalue; if (PMC_CALL(PMC_OP_PMCRW, &pmc_rw_op) < 0) return (-1); *oldvaluep = pmc_rw_op.pm_value; return (0); } int pmc_set(pmc_id_t pmc, pmc_value_t value) { struct pmc_op_pmcsetcount sc; sc.pm_pmcid = pmc; sc.pm_count = value; if (PMC_CALL(PMC_OP_PMCSETCOUNT, &sc) < 0) return (-1); return (0); } int pmc_start(pmc_id_t pmc) { struct pmc_op_simple pmc_start_args; pmc_start_args.pm_pmcid = pmc; return (PMC_CALL(PMC_OP_PMCSTART, &pmc_start_args)); } int pmc_stop(pmc_id_t pmc) { struct pmc_op_simple pmc_stop_args; pmc_stop_args.pm_pmcid = pmc; return (PMC_CALL(PMC_OP_PMCSTOP, &pmc_stop_args)); } int pmc_width(pmc_id_t pmcid, uint32_t *width) { unsigned int i; enum pmc_class cl; cl = PMC_ID_TO_CLASS(pmcid); for (i = 0; i < cpu_info.pm_nclass; i++) if (cpu_info.pm_classes[i].pm_class == cl) { *width = cpu_info.pm_classes[i].pm_width; return (0); } errno = EINVAL; return (-1); } int pmc_write(pmc_id_t pmc, pmc_value_t value) { struct pmc_op_pmcrw pmc_write_op; pmc_write_op.pm_pmcid = pmc; pmc_write_op.pm_flags = PMC_F_NEWVALUE; pmc_write_op.pm_value = value; return (PMC_CALL(PMC_OP_PMCRW, &pmc_write_op)); } int pmc_writelog(uint32_t userdata) { struct pmc_op_writelog wl; wl.pm_userdata = userdata; return (PMC_CALL(PMC_OP_WRITELOG, &wl)); } diff --git a/lib/libpmc/libpmc_pmu_util.c b/lib/libpmc/libpmc_pmu_util.c index 772dec7a9d53..fa2e76e8d026 100644 --- a/lib/libpmc/libpmc_pmu_util.c +++ b/lib/libpmc/libpmc_pmu_util.c @@ -1,684 +1,684 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018, Matthew Macy * Copyright (c) 2021, The FreeBSD Foundation * * Portions of this software were developed by Mitchell Horne * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pmu-events/pmu-events.h" struct pmu_alias { const char *pa_alias; const char *pa_name; }; #if defined(__amd64__) || defined(__i386__) typedef enum { PMU_INVALID, PMU_INTEL, PMU_AMD, } pmu_mfr_t; static struct pmu_alias pmu_intel_alias_table[] = { {"UNHALTED_CORE_CYCLES", "cpu_clk_unhalted.thread"}, {"UNHALTED-CORE-CYCLES", "cpu_clk_unhalted.thread"}, {"LLC_MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC-MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC_REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC-REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC_MISS_RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"LLC-MISS-RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"RESOURCE_STALL", "RESOURCE_STALLS.ANY"}, {"RESOURCE_STALLS_ANY", "RESOURCE_STALLS.ANY"}, {"BRANCH_INSTRUCTION_RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH-INSTRUCTION-RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH_MISSES_RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"BRANCH-MISSES-RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"unhalted-cycles", "cpu_clk_unhalted.thread"}, {"instructions", "inst_retired.any"}, {"branch-mispredicts", "br_misp_retired.all_branches"}, {"branches", "br_inst_retired.all_branches"}, {"interrupts", "hw_interrupts.received"}, {"ic-misses", "frontend_retired.l1i_miss"}, {NULL, NULL}, }; static struct pmu_alias pmu_amd_alias_table[] = { {"UNHALTED_CORE_CYCLES", "ls_not_halted_cyc"}, {"UNHALTED-CORE-CYCLES", "ls_not_halted_cyc"}, {"LLC_MISSES", "l3_comb_clstr_state.request_miss"}, {"LLC-MISSES", "l3_comb_clstr_state.request_miss"}, {"LLC_REFERENCE", "l3_request_g1.caching_l3_cache_accesses"}, {"LLC-REFERENCE", "l3_request_g1.caching_l3_cache_accesses"}, {"BRANCH_INSTRUCTION_RETIRED", "ex_ret_brn"}, {"BRANCH-INSTRUCTION-RETIRED", "ex_ret_brn"}, {"BRANCH_MISSES_RETIRED", "ex_ret_brn_misp"}, {"BRANCH-MISSES-RETIRED", "ex_ret_brn_misp"}, {"unhalted-cycles", "ls_not_halted_cyc"}, {"instructions", "ex_ret_instr",}, {"branch-mispredicts", "ex_ret_brn_misp"}, {"branches", "ex_ret_brn"}, {"interrupts", "ls_int_taken"}, /* Not on amdzen1 */ {NULL, NULL}, }; static pmu_mfr_t pmu_events_mfr(void) { char buf[PMC_CPUID_LEN]; size_t s = sizeof(buf); pmu_mfr_t mfr; if (sysctlbyname("kern.hwpmc.cpuid", buf, &s, (void *)NULL, 0) == -1) return (PMU_INVALID); if (strcasestr(buf, "AuthenticAMD") != NULL || strcasestr(buf, "HygonGenuine") != NULL) mfr = PMU_AMD; else if (strcasestr(buf, "GenuineIntel") != NULL) mfr = PMU_INTEL; else mfr = PMU_INVALID; return (mfr); } /* * The Intel fixed mode counters are: * "inst_retired.any", * "cpu_clk_unhalted.thread", * "cpu_clk_unhalted.thread_any", * "cpu_clk_unhalted.ref_tsc", * */ static const char * pmu_alias_get(const char *name) { pmu_mfr_t mfr; struct pmu_alias *pa; struct pmu_alias *pmu_alias_table; if ((mfr = pmu_events_mfr()) == PMU_INVALID) return (name); if (mfr == PMU_AMD) pmu_alias_table = pmu_amd_alias_table; else if (mfr == PMU_INTEL) pmu_alias_table = pmu_intel_alias_table; else return (name); for (pa = pmu_alias_table; pa->pa_alias != NULL; pa++) if (strcasecmp(name, pa->pa_alias) == 0) return (pa->pa_name); return (name); } #elif defined(__powerpc64__) static const char * pmu_alias_get(const char *name) { return (name); } #elif defined(__aarch64__) static struct pmu_alias pmu_armv8_alias_table[] = { {"UNHALTED_CORE_CYCLES", "CPU_CYCLES"}, {"UNHALTED-CORE-CYCLES", "CPU_CYCLES"}, {"LLC_MISSES", "LL_CACHE_MISS_RD"}, {"LLC-MISSES", "LL_CACHE_MISS_RD"}, {"LLC_REFERENCE", "LL_CACHE_RD"}, {"LLC-REFERENCE", "LL_CACHE_RD"}, {"BRANCH_INSTRUCTION_RETIRED", "BR_RETIRED"}, {"BRANCH-INSTRUCTION-RETIRED", "BR_RETIRED"}, {"BRANCH_MISSES_RETIRED", "BR_MIS_PRED_RETIRED"}, {"BRANCH-MISSES-RETIRED", "BR_MIS_PRED_RETIRED"}, {"unhalted-cycles", "CPU_CYCLES"}, {"instructions", "INST_RETIRED",}, {"branch-mispredicts", "BR_MIS_PRED_RETIRED"}, {"branches", "BR_RETIRED"}, {"interrupts", "EXC_IRQ"}, {NULL, NULL}, }; static const char * pmu_alias_get(const char *name) { struct pmu_alias *pa; for (pa = pmu_armv8_alias_table; pa->pa_alias != NULL; pa++) if (strcasecmp(name, pa->pa_alias) == 0) return (pa->pa_name); return (name); } #else static const char * pmu_alias_get(const char *name) { return (name); } #endif struct pmu_event_desc { uint64_t ped_period; uint64_t ped_offcore_rsp; uint64_t ped_l3_thread; uint64_t ped_l3_slice; uint32_t ped_event; uint32_t ped_frontend; uint32_t ped_ldlat; uint32_t ped_config1; int16_t ped_umask; uint8_t ped_cmask; uint8_t ped_any; uint8_t ped_inv; uint8_t ped_edge; uint8_t ped_fc_mask; uint8_t ped_ch_mask; }; static const struct pmu_events_map * pmu_events_map_get(const char *cpuid) { regex_t re; regmatch_t pmatch[1]; char buf[PMC_CPUID_LEN]; size_t s = sizeof(buf); int match; const struct pmu_events_map *pme; if (cpuid != NULL) { strlcpy(buf, cpuid, s); } else { if (sysctlbyname("kern.hwpmc.cpuid", buf, &s, (void *)NULL, 0) == -1) return (NULL); } for (pme = pmu_events_map; pme->cpuid != NULL; pme++) { if (regcomp(&re, pme->cpuid, REG_EXTENDED) != 0) { printf("regex '%s' failed to compile, ignoring\n", pme->cpuid); continue; } match = regexec(&re, buf, 1, pmatch, 0); regfree(&re); if (match == 0) { if (pmatch[0].rm_so == 0 && (buf[pmatch[0].rm_eo] == 0 || buf[pmatch[0].rm_eo] == '-')) return (pme); } } return (NULL); } static const struct pmu_event * pmu_event_get(const char *cpuid, const char *event_name, int *idx) { const struct pmu_events_map *pme; const struct pmu_event *pe; int i; if ((pme = pmu_events_map_get(cpuid)) == NULL) return (NULL); for (i = 0, pe = pme->table; pe->name || pe->desc || pe->event; pe++, i++) { if (pe->name == NULL) continue; if (strcasecmp(pe->name, event_name) == 0) { if (idx) *idx = i; return (pe); } } return (NULL); } int pmc_pmu_idx_get_by_event(const char *cpuid, const char *event) { int idx; const char *realname; realname = pmu_alias_get(event); if (pmu_event_get(cpuid, realname, &idx) == NULL) return (-1); return (idx); } const char * pmc_pmu_event_get_by_idx(const char *cpuid, int idx) { const struct pmu_events_map *pme; if ((pme = pmu_events_map_get(cpuid)) == NULL) return (NULL); assert(pme->table[idx].name); return (pme->table[idx].name); } static int pmu_parse_event(struct pmu_event_desc *ped, const char *eventin) { char *event; char *kvp, *key, *value, *r; char *debug; if ((event = strdup(eventin)) == NULL) return (ENOMEM); r = event; bzero(ped, sizeof(*ped)); ped->ped_period = DEFAULT_SAMPLE_COUNT; ped->ped_umask = -1; while ((kvp = strsep(&event, ",")) != NULL) { key = strsep(&kvp, "="); if (key == NULL) abort(); value = kvp; if (strcmp(key, "umask") == 0) ped->ped_umask = strtol(value, NULL, 16); else if (strcmp(key, "event") == 0) ped->ped_event = strtol(value, NULL, 16); else if (strcmp(key, "period") == 0) ped->ped_period = strtol(value, NULL, 10); else if (strcmp(key, "offcore_rsp") == 0) ped->ped_offcore_rsp = strtol(value, NULL, 16); else if (strcmp(key, "any") == 0) ped->ped_any = strtol(value, NULL, 10); else if (strcmp(key, "cmask") == 0) ped->ped_cmask = strtol(value, NULL, 10); else if (strcmp(key, "inv") == 0) ped->ped_inv = strtol(value, NULL, 10); else if (strcmp(key, "edge") == 0) ped->ped_edge = strtol(value, NULL, 10); else if (strcmp(key, "frontend") == 0) ped->ped_frontend = strtol(value, NULL, 16); else if (strcmp(key, "ldlat") == 0) ped->ped_ldlat = strtol(value, NULL, 16); else if (strcmp(key, "fc_mask") == 0) ped->ped_fc_mask = strtol(value, NULL, 16); else if (strcmp(key, "ch_mask") == 0) ped->ped_ch_mask = strtol(value, NULL, 16); else if (strcmp(key, "config1") == 0) ped->ped_config1 = strtol(value, NULL, 16); else if (strcmp(key, "l3_thread_mask") == 0) ped->ped_l3_thread = strtol(value, NULL, 16); else if (strcmp(key, "l3_slice_mask") == 0) ped->ped_l3_slice = strtol(value, NULL, 16); else { debug = getenv("PMUDEBUG"); if (debug != NULL && strcmp(debug, "true") == 0 && value != NULL) printf("unrecognized kvpair: %s:%s\n", key, value); } } free(r); return (0); } uint64_t pmc_pmu_sample_rate_get(const char *event_name) { const struct pmu_event *pe; struct pmu_event_desc ped; event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(NULL, event_name, NULL)) == NULL) return (DEFAULT_SAMPLE_COUNT); if (pe->event == NULL) return (DEFAULT_SAMPLE_COUNT); if (pmu_parse_event(&ped, pe->event)) return (DEFAULT_SAMPLE_COUNT); return (ped.ped_period); } int pmc_pmu_enabled(void) { return (pmu_events_map_get(NULL) != NULL); } void pmc_pmu_print_counters(const char *event_name) { const struct pmu_events_map *pme; const struct pmu_event *pe; struct pmu_event_desc ped; char *debug; int do_debug; debug = getenv("PMUDEBUG"); do_debug = 0; if (debug != NULL && strcmp(debug, "true") == 0) do_debug = 1; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (event_name != NULL && strcasestr(pe->name, event_name) == NULL) continue; printf("\t%s\n", pe->name); if (do_debug) pmu_parse_event(&ped, pe->event); } } void pmc_pmu_print_counter_desc(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL && pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } void pmc_pmu_print_counter_desc_long(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL) { if (pe->long_desc != NULL) printf("%s:\n%s\n", pe->name, pe->long_desc); else if (pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } } void pmc_pmu_print_counter_full(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get(NULL)) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) == NULL) continue; printf("name: %s\n", pe->name); if (pe->long_desc != NULL) printf("desc: %s\n", pe->long_desc); else if (pe->desc != NULL) printf("desc: %s\n", pe->desc); if (pe->event != NULL) printf("event: %s\n", pe->event); if (pe->topic != NULL) printf("topic: %s\n", pe->topic); if (pe->pmu != NULL) printf("pmu: %s\n", pe->pmu); if (pe->unit != NULL) printf("unit: %s\n", pe->unit); if (pe->perpkg != NULL) printf("perpkg: %s\n", pe->perpkg); if (pe->metric_expr != NULL) printf("metric_expr: %s\n", pe->metric_expr); if (pe->metric_name != NULL) printf("metric_name: %s\n", pe->metric_name); if (pe->metric_group != NULL) printf("metric_group: %s\n", pe->metric_group); } } #if defined(__amd64__) || defined(__i386__) static int pmc_pmu_amd_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm, struct pmu_event_desc *ped) { struct pmc_md_amd_op_pmcallocate *amd; const struct pmu_event *pe; int idx = -1; amd = &pm->pm_md.pm_amd; if (ped->ped_umask > 0) { pm->pm_caps |= PMC_CAP_QUALIFIER; amd->pm_amd_config |= AMD_PMC_TO_UNITMASK(ped->ped_umask); } pm->pm_class = PMC_CLASS_K8; pe = pmu_event_get(NULL, event_name, &idx); if (strcmp("l3cache", pe->topic) == 0){ amd->pm_amd_config |= AMD_PMC_TO_EVENTMASK(ped->ped_event); amd->pm_amd_sub_class = PMC_AMD_SUB_CLASS_L3_CACHE; amd->pm_amd_config |= AMD_PMC_TO_L3SLICE(ped->ped_l3_slice); amd->pm_amd_config |= AMD_PMC_TO_L3CORE(ped->ped_l3_thread); } else if (strcmp("data fabric", pe->topic) == 0){ amd->pm_amd_config |= AMD_PMC_TO_EVENTMASK_DF(ped->ped_event); amd->pm_amd_sub_class = PMC_AMD_SUB_CLASS_DATA_FABRIC; } else{ amd->pm_amd_config |= AMD_PMC_TO_EVENTMASK(ped->ped_event); amd->pm_amd_sub_class = PMC_AMD_SUB_CLASS_CORE; if ((pm->pm_caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == 0 || (pm->pm_caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == (PMC_CAP_USER|PMC_CAP_SYSTEM)) amd->pm_amd_config |= (AMD_PMC_USR | AMD_PMC_OS); else if (pm->pm_caps & PMC_CAP_USER) amd->pm_amd_config |= AMD_PMC_USR; else if (pm->pm_caps & PMC_CAP_SYSTEM) amd->pm_amd_config |= AMD_PMC_OS; if (ped->ped_edge) amd->pm_amd_config |= AMD_PMC_EDGE; if (ped->ped_inv) amd->pm_amd_config |= AMD_PMC_INVERT; if (pm->pm_caps & PMC_CAP_INTERRUPT) amd->pm_amd_config |= AMD_PMC_INT; } return (0); } static int pmc_pmu_intel_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm, struct pmu_event_desc *ped) { struct pmc_md_iap_op_pmcallocate *iap; iap = &pm->pm_md.pm_iap; if (strcasestr(event_name, "UNC_") == event_name || strcasestr(event_name, "uncore") != NULL) { pm->pm_class = PMC_CLASS_UCP; pm->pm_caps |= PMC_CAP_QUALIFIER; } else if (ped->ped_event == 0x0) { pm->pm_class = PMC_CLASS_IAF; } else { pm->pm_class = PMC_CLASS_IAP; pm->pm_caps |= PMC_CAP_QUALIFIER; } iap->pm_iap_config |= IAP_EVSEL(ped->ped_event); if (ped->ped_umask > 0) iap->pm_iap_config |= IAP_UMASK(ped->ped_umask); iap->pm_iap_config |= IAP_CMASK(ped->ped_cmask); iap->pm_iap_rsp = ped->ped_offcore_rsp; if ((pm->pm_caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == 0 || (pm->pm_caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == (PMC_CAP_USER|PMC_CAP_SYSTEM)) iap->pm_iap_config |= (IAP_USR | IAP_OS); else if (pm->pm_caps & PMC_CAP_USER) iap->pm_iap_config |= IAP_USR; else if (pm->pm_caps & PMC_CAP_SYSTEM) iap->pm_iap_config |= IAP_OS; if (ped->ped_edge) iap->pm_iap_config |= IAP_EDGE; if (ped->ped_any) iap->pm_iap_config |= IAP_ANY; if (ped->ped_inv) iap->pm_iap_config |= IAP_INV; if (pm->pm_caps & PMC_CAP_INTERRUPT) iap->pm_iap_config |= IAP_INT; return (0); } static int pmc_pmu_pmcallocate_md(const char *event_name, struct pmc_op_pmcallocate *pm) { const struct pmu_event *pe; struct pmu_event_desc ped; pmu_mfr_t mfr; int idx = -1; if ((mfr = pmu_events_mfr()) == PMU_INVALID) return (ENOENT); bzero(&pm->pm_md, sizeof(pm->pm_md)); pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(NULL, event_name, &idx)) == NULL) return (ENOENT); assert(idx >= 0); pm->pm_ev = idx; if (pe->event == NULL) return (ENOENT); if (pmu_parse_event(&ped, pe->event)) return (ENOENT); if (mfr == PMU_INTEL) return (pmc_pmu_intel_pmcallocate(event_name, pm, &ped)); else return (pmc_pmu_amd_pmcallocate(event_name, pm, &ped)); } #elif defined(__powerpc64__) static int pmc_pmu_pmcallocate_md(const char *event_name, struct pmc_op_pmcallocate *pm) { const struct pmu_event *pe; struct pmu_event_desc ped; int idx = -1; bzero(&pm->pm_md, sizeof(pm->pm_md)); pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(NULL, event_name, &idx)) == NULL) return (ENOENT); if (pe->event == NULL) return (ENOENT); if (pmu_parse_event(&ped, pe->event)) return (ENOENT); assert(ped.ped_event >= 0); pm->pm_ev = idx; pm->pm_md.pm_event = ped.ped_event; pm->pm_class = PMC_CLASS_POWER8; return (0); } #elif defined(__aarch64__) static int pmc_pmu_pmcallocate_md(const char *event_name, struct pmc_op_pmcallocate *pm) { const struct pmu_event *pe; struct pmu_event_desc ped; int idx = -1; event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(NULL, event_name, &idx)) == NULL) return (ENOENT); if (pe->event == NULL) return (ENOENT); if (pmu_parse_event(&ped, pe->event)) return (ENOENT); assert(idx >= 0); pm->pm_ev = idx; pm->pm_md.pm_md_config = ped.ped_event; - pm->pm_md.pm_md_flags |= PM_MD_RAW_EVENT; pm->pm_class = PMC_CLASS_ARMV8; pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); return (0); } #else static int pmc_pmu_pmcallocate_md(const char *e __unused, struct pmc_op_pmcallocate *p __unused) { return (EOPNOTSUPP); } #endif int pmc_pmu_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm) { int error; error = pmc_pmu_pmcallocate_md(event_name, pm); if (error != 0) { /* Reset any changes. */ pm->pm_ev = 0; pm->pm_caps = 0; pm->pm_class = 0; return (error); } + pm->pm_flags |= PMC_F_EV_PMU; return (0); } diff --git a/lib/libpmc/pmclog.c b/lib/libpmc/pmclog.c index a21fe42c9947..3b1572baaa2c 100644 --- a/lib/libpmc/pmclog.c +++ b/lib/libpmc/pmclog.c @@ -1,614 +1,609 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpmcinternal.h" #define PMCLOG_BUFFER_SIZE 512*1024 /* * API NOTES * * The pmclog(3) API is oriented towards parsing an event stream in * "realtime", i.e., from an data source that may or may not preserve * record boundaries -- for example when the data source is elsewhere * on a network. The API allows data to be fed into the parser zero * or more bytes at a time. * * The state for a log file parser is maintained in a 'struct * pmclog_parse_state'. Parser invocations are done by calling * 'pmclog_read()'; this function will inform the caller when a * complete event is parsed. * * The parser first assembles a complete log file event in an internal * work area (see "ps_saved" below). Once a complete log file event * is read, the parser then parses it and converts it to an event * descriptor usable by the client. We could possibly avoid this two * step process by directly parsing the input log to set fields in the * event record. However the parser's state machine would get * insanely complicated, and this code is unlikely to be used in * performance critical paths. */ #define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \ (* ((uint32_t *) &(PS)->ps_saved)) #define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A) #define PMCLOG_SKIP32(LE) (LE)++ #define PMCLOG_READ32(LE,V) do { \ (V) = *(LE)++; \ } while (0) #define PMCLOG_READ64(LE,V) do { \ uint64_t _v; \ _v = (uint64_t) *(LE)++; \ _v |= ((uint64_t) *(LE)++) << 32; \ (V) = _v; \ } while (0) #define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN)) /* * Assemble a log record from '*len' octets starting from address '*data'. * Update 'data' and 'len' to reflect the number of bytes consumed. * * '*data' is potentially an unaligned address and '*len' octets may * not be enough to complete a event record. */ static enum pmclog_parser_state pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len) { int avail, copylen, recordsize, used; uint32_t h; const int HEADERSIZE = sizeof(uint32_t); char *src, *dst; if ((avail = *len) <= 0) return (ps->ps_state = PL_STATE_ERROR); src = *data; used = 0; if (ps->ps_state == PL_STATE_NEW_RECORD) ps->ps_svcount = 0; dst = (char *) &ps->ps_saved + ps->ps_svcount; switch (ps->ps_state) { case PL_STATE_NEW_RECORD: /* * Transitions: * * Case A: avail < headersize * -> 'expecting header' * * Case B: avail >= headersize * B.1: avail < recordsize * -> 'partial record' * B.2: avail >= recordsize * -> 'new record' */ copylen = avail < HEADERSIZE ? avail : HEADERSIZE; bcopy(src, dst, copylen); ps->ps_svcount = used = copylen; if (copylen < HEADERSIZE) { ps->ps_state = PL_STATE_EXPECTING_HEADER; goto done; } src += copylen; dst += copylen; h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (recordsize <= avail) { /* full record available */ bcopy(src, dst, recordsize - copylen); ps->ps_svcount = used = recordsize; goto done; } /* header + a partial record is available */ bcopy(src, dst, avail - copylen); ps->ps_svcount = used = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; break; case PL_STATE_EXPECTING_HEADER: /* * Transitions: * * Case C: avail+saved < headersize * -> 'expecting header' * * Case D: avail+saved >= headersize * D.1: avail+saved < recordsize * -> 'partial record' * D.2: avail+saved >= recordsize * -> 'new record' * (see PARTIAL_RECORD handling below) */ if (avail + ps->ps_svcount < HEADERSIZE) { bcopy(src, dst, avail); ps->ps_svcount += avail; used = avail; break; } used = copylen = HEADERSIZE - ps->ps_svcount; bcopy(src, dst, copylen); src += copylen; dst += copylen; avail -= copylen; ps->ps_svcount += copylen; /*FALLTHROUGH*/ case PL_STATE_PARTIAL_RECORD: /* * Transitions: * * Case E: avail+saved < recordsize * -> 'partial record' * * Case F: avail+saved >= recordsize * -> 'new record' */ h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (avail + ps->ps_svcount < recordsize) { copylen = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; } else { copylen = recordsize - ps->ps_svcount; ps->ps_state = PL_STATE_NEW_RECORD; } bcopy(src, dst, copylen); ps->ps_svcount += copylen; used += copylen; break; default: goto error; } done: *data += used; *len -= used; return ps->ps_state; error: ps->ps_state = PL_STATE_ERROR; return ps->ps_state; } /* * Get an event from the stream pointed to by '*data'. '*len' * indicates the number of bytes available to parse. Arguments * '*data' and '*len' are updated to indicate the number of bytes * consumed. */ static int pmclog_get_event(void *cookie, char **data, ssize_t *len, struct pmclog_ev *ev) { int evlen, pathlen; uint32_t h, *le, npc; enum pmclog_parser_state e; struct pmclog_parse_state *ps; struct pmclog_header *ph; ps = (struct pmclog_parse_state *) cookie; assert(ps->ps_state != PL_STATE_ERROR); if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; printf("state error\n"); return -1; } if (e != PL_STATE_NEW_RECORD) { ev->pl_state = PMCLOG_REQUIRE_DATA; return -1; } PMCLOG_INITIALIZE_READER(le, ps->ps_saved); ev->pl_data = le; ph = (struct pmclog_header *)(uintptr_t)le; h = ph->pl_header; if (!PMCLOG_HEADER_CHECK_MAGIC(h)) { printf("bad magic\n"); ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return -1; } /* copy out the time stamp */ ev->pl_ts.tv_sec = ph->pl_tsc; le += sizeof(*ph)/4; evlen = PMCLOG_HEADER_TO_LENGTH(h); #define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \ (P) = (E) - offsetof(struct TYPE, pl_pathname); \ if ((P) > PATH_MAX || (P) < 0) \ goto error; \ } while (0) #define PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do { \ (SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc)) \ / sizeof(uintfptr_t); \ } while (0); switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) { case PMCLOG_TYPE_CALLCHAIN: PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_tid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags); PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen); for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++) PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]); for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++) ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0; break; case PMCLOG_TYPE_CLOSELOG: ev->pl_state = PMCLOG_EOF; return (-1); case PMCLOG_TYPE_DROPNOTIFY: /* nothing to do */ break; case PMCLOG_TYPE_INITIALIZE: PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version); PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch); PMCLOG_READ64(le,ev->pl_u.pl_i.pl_tsc_freq); memcpy(&ev->pl_u.pl_i.pl_ts, le, sizeof(struct timespec)); le += sizeof(struct timespec)/4; PMCLOG_READSTRING(le, ev->pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN); memcpy(ev->pl_u.pl_i.pl_cpuid, le, PMC_CPUID_LEN); ps->ps_cpuid = strdup(ev->pl_u.pl_i.pl_cpuid); ps->ps_version = ev->pl_u.pl_i.pl_version; ps->ps_arch = ev->pl_u.pl_i.pl_arch; ps->ps_initialized = 1; break; case PMCLOG_TYPE_MAP_IN: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in); PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid); PMCLOG_SKIP32(le); PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start); PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen); break; case PMCLOG_TYPE_MAP_OUT: PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid); PMCLOG_SKIP32(le); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end); break; case PMCLOG_TYPE_PMCALLOCATE: PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags); PMCLOG_SKIP32(le); PMCLOG_READ64(le,ev->pl_u.pl_a.pl_rate); /* - * Could be either a PMC event code or a PMU event index; - * assume that their encodings don't overlap (i.e. no PMU event - * table is more than 0x1000 entries) to distinguish them here. - * Otherwise pmc_pmu_event_get_by_idx will go out of bounds if - * given a PMC event code when it knows about that CPU. - * - * XXX: Ideally we'd have user flags to give us that context. + * pl_event could contain either a PMC event code or a PMU + * event index. */ - if (ev->pl_u.pl_a.pl_event < PMC_EVENT_FIRST) + if ((ev->pl_u.pl_a.pl_flags & PMC_F_EV_PMU) != 0) ev->pl_u.pl_a.pl_evname = pmc_pmu_event_get_by_idx(ps->ps_cpuid, ev->pl_u.pl_a.pl_event); else if (ev->pl_u.pl_a.pl_event <= PMC_EVENT_LAST) ev->pl_u.pl_a.pl_evname = _pmc_name_of_event(ev->pl_u.pl_a.pl_event, ps->ps_arch); else ev->pl_u.pl_a.pl_evname = NULL; if (ev->pl_u.pl_a.pl_evname == NULL) { printf("unknown event\n"); goto error; } break; case PMCLOG_TYPE_PMCALLOCATEDYN: PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags); PMCLOG_SKIP32(le); PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX); break; case PMCLOG_TYPE_PMCATTACH: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid); PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen); break; case PMCLOG_TYPE_PMCDETACH: PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid); break; case PMCLOG_TYPE_PROCCSW: PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_tid); break; case PMCLOG_TYPE_PROCEXEC: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid); PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_baseaddr); PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_dynaddr); PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen); break; case PMCLOG_TYPE_PROCEXIT: PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid); PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value); break; case PMCLOG_TYPE_PROCFORK: PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid); PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid); break; case PMCLOG_TYPE_SYSEXIT: PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid); break; case PMCLOG_TYPE_USERDATA: PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata); break; case PMCLOG_TYPE_THR_CREATE: PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_tid); PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_flags); PMCLOG_SKIP32(le); memcpy(ev->pl_u.pl_tc.pl_tdname, le, MAXCOMLEN+1); break; case PMCLOG_TYPE_THR_EXIT: PMCLOG_READ32(le,ev->pl_u.pl_te.pl_tid); break; case PMCLOG_TYPE_PROC_CREATE: PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_flags); memcpy(ev->pl_u.pl_pc.pl_pcomm, le, MAXCOMLEN+1); break; default: /* unknown record type */ ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return (-1); } ev->pl_offset = (ps->ps_offset += evlen); ev->pl_count = (ps->ps_count += 1); ev->pl_len = evlen; ev->pl_state = PMCLOG_OK; return 0; error: ev->pl_state = PMCLOG_ERROR; ps->ps_state = PL_STATE_ERROR; return -1; } /* * Extract and return the next event from the byte stream. * * Returns 0 and sets the event's state to PMCLOG_OK in case an event * was successfully parsed. Otherwise this function returns -1 and * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if * a parse error was encountered. */ int pmclog_read(void *cookie, struct pmclog_ev *ev) { int retval; ssize_t nread; struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_state == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; return -1; } /* * If there isn't enough data left for a new event try and get * more data. */ if (ps->ps_len == 0) { ev->pl_state = PMCLOG_REQUIRE_DATA; /* * If we have a valid file descriptor to read from, attempt * to read from that. This read may return with an error, * (which may be EAGAIN or other recoverable error), or * can return EOF. */ if (ps->ps_fd != PMCLOG_FD_NONE) { refill: nread = read(ps->ps_fd, ps->ps_buffer, PMCLOG_BUFFER_SIZE); if (nread <= 0) { if (nread == 0) ev->pl_state = PMCLOG_EOF; else if (errno != EAGAIN) /* not restartable */ ev->pl_state = PMCLOG_ERROR; return -1; } ps->ps_len = nread; ps->ps_data = ps->ps_buffer; } else { return -1; } } assert(ps->ps_len > 0); /* Retrieve one event from the byte stream. */ retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev); /* * If we need more data and we have a configured fd, try read * from it. */ if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA && ps->ps_fd != -1) { assert(ps->ps_len == 0); goto refill; } return retval; } /* * Feed data to a memory based parser. * * The memory area pointed to by 'data' needs to be valid till the * next error return from pmclog_next_event(). */ int pmclog_feed(void *cookie, char *data, int len) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (len < 0 || /* invalid length */ ps->ps_buffer || /* called for a file parser */ ps->ps_len != 0) /* unnecessary call */ return -1; ps->ps_data = data; ps->ps_len = len; return 0; } /* * Allocate and initialize parser state. */ void * pmclog_open(int fd) { struct pmclog_parse_state *ps; if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL) return NULL; ps->ps_state = PL_STATE_NEW_RECORD; ps->ps_arch = -1; ps->ps_initialized = 0; ps->ps_count = 0; ps->ps_offset = (off_t) 0; bzero(&ps->ps_saved, sizeof(ps->ps_saved)); ps->ps_cpuid = NULL; ps->ps_svcount = 0; ps->ps_fd = fd; ps->ps_data = NULL; ps->ps_buffer = NULL; ps->ps_len = 0; /* allocate space for a work area */ if (ps->ps_fd != PMCLOG_FD_NONE) { if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) { free(ps); return NULL; } } return ps; } /* * Free up parser state. */ void pmclog_close(void *cookie) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_buffer) free(ps->ps_buffer); free(ps); } diff --git a/sys/arm64/include/pmc_mdep.h b/sys/arm64/include/pmc_mdep.h index 5c43a7924b01..97d0f30c9c09 100644 --- a/sys/arm64/include/pmc_mdep.h +++ b/sys/arm64/include/pmc_mdep.h @@ -1,93 +1,91 @@ /*- * Copyright (c) 2009 Rui Paulo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _MACHINE_PMC_MDEP_H_ #define _MACHINE_PMC_MDEP_H_ #define PMC_MDEP_CLASS_INDEX_ARMV8 1 #define PMC_MDEP_CLASS_INDEX_DMC620_CD2 2 #define PMC_MDEP_CLASS_INDEX_DMC620_C 3 #define PMC_MDEP_CLASS_INDEX_CMN600 4 /* * On the ARMv8 platform we support the following PMCs. * * ARMV8 ARM Cortex-A53/57/72 processors */ #include #include #include #include #include union pmc_md_op_pmcallocate { struct { uint32_t pm_md_config; - uint32_t pm_md_flags; -#define PM_MD_RAW_EVENT 0x1 }; struct pmc_md_cmn600_pmu_op_pmcallocate pm_cmn600; struct pmc_md_dmc620_pmu_op_pmcallocate pm_dmc620; uint64_t __pad[4]; }; /* Logging */ #define PMCLOG_READADDR PMCLOG_READ64 #define PMCLOG_EMITADDR PMCLOG_EMIT64 #ifdef _KERNEL union pmc_md_pmc { struct pmc_md_arm64_pmc pm_arm64; struct pmc_md_cmn600_pmc pm_cmn600; struct pmc_md_dmc620_pmc pm_dmc620; }; #define PMC_IN_KERNEL_STACK(va) kstack_contains(curthread, (va), sizeof(va)) #define PMC_IN_KERNEL(va) INKERNEL((va)) #define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS) #define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_elr) #define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_x[29]) /* * Prototypes */ struct pmc_mdep *pmc_arm64_initialize(void); void pmc_arm64_finalize(struct pmc_mdep *_md); /* Optional class for CMN-600 controler's PMU. */ int pmc_cmn600_initialize(struct pmc_mdep *md); void pmc_cmn600_finalize(struct pmc_mdep *_md); int pmc_cmn600_nclasses(void); /* Optional class for DMC-620 controler's PMU. */ int pmc_dmc620_initialize_cd2(struct pmc_mdep *md); void pmc_dmc620_finalize_cd2(struct pmc_mdep *_md); int pmc_dmc620_initialize_c(struct pmc_mdep *md); void pmc_dmc620_finalize_c(struct pmc_mdep *_md); int pmc_dmc620_nclasses(void); #endif /* _KERNEL */ #endif /* !_MACHINE_PMC_MDEP_H_ */ diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c index b15d223bc7a5..fbbaf92a1547 100644 --- a/sys/dev/hwpmc/hwpmc_amd.c +++ b/sys/dev/hwpmc/hwpmc_amd.c @@ -1,1207 +1,1210 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /* Support for the AMD K7 and later processors */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_DEBUG enum pmc_class amd_pmc_class; #endif #define OVERFLOW_WAIT_COUNT 50 DPCPU_DEFINE_STATIC(uint32_t, nmi_counter); /* AMD K7 & K8 PMCs */ struct amd_descr { struct pmc_descr pm_descr; /* "base class" */ uint32_t pm_evsel; /* address of EVSEL register */ uint32_t pm_perfctr; /* address of PERFCTR register */ }; static struct amd_descr amd_pmcdesc[AMD_NPMCS] = { { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_0, .pm_perfctr = AMD_PMC_PERFCTR_0 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_1, .pm_perfctr = AMD_PMC_PERFCTR_1 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_2, .pm_perfctr = AMD_PMC_PERFCTR_2 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_3, .pm_perfctr = AMD_PMC_PERFCTR_3 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_4, .pm_perfctr = AMD_PMC_PERFCTR_4 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_5, .pm_perfctr = AMD_PMC_PERFCTR_5 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_0, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_0 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_1, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_1 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_2, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_2 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_3, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_3 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_4, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_4 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_L3_5, .pm_perfctr = AMD_PMC_PERFCTR_EP_L3_5 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_DF_0, .pm_perfctr = AMD_PMC_PERFCTR_EP_DF_0 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_DF_1, .pm_perfctr = AMD_PMC_PERFCTR_EP_DF_1 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_DF_2, .pm_perfctr = AMD_PMC_PERFCTR_EP_DF_2 }, { .pm_descr = { .pd_name = "", .pd_class = -1, .pd_caps = AMD_PMC_CAPS, .pd_width = 48 }, .pm_evsel = AMD_PMC_EVSEL_EP_DF_3, .pm_perfctr = AMD_PMC_PERFCTR_EP_DF_3 } }; struct amd_event_code_map { enum pmc_event pe_ev; /* enum value */ uint16_t pe_code; /* encoded event mask */ uint8_t pe_mask; /* bits allowed in unit mask */ }; const struct amd_event_code_map amd_event_codes[] = { #if defined(__i386__) /* 32 bit Athlon (K7) only */ { PMC_EV_K7_DC_ACCESSES, 0x40, 0 }, { PMC_EV_K7_DC_MISSES, 0x41, 0 }, { PMC_EV_K7_DC_REFILLS_FROM_L2, 0x42, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_DC_REFILLS_FROM_SYSTEM, 0x43, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_DC_WRITEBACKS, 0x44, AMD_PMC_UNITMASK_MOESI }, { PMC_EV_K7_L1_DTLB_MISS_AND_L2_DTLB_HITS, 0x45, 0 }, { PMC_EV_K7_L1_AND_L2_DTLB_MISSES, 0x46, 0 }, { PMC_EV_K7_MISALIGNED_REFERENCES, 0x47, 0 }, { PMC_EV_K7_IC_FETCHES, 0x80, 0 }, { PMC_EV_K7_IC_MISSES, 0x81, 0 }, { PMC_EV_K7_L1_ITLB_MISSES, 0x84, 0 }, { PMC_EV_K7_L1_L2_ITLB_MISSES, 0x85, 0 }, { PMC_EV_K7_RETIRED_INSTRUCTIONS, 0xC0, 0 }, { PMC_EV_K7_RETIRED_OPS, 0xC1, 0 }, { PMC_EV_K7_RETIRED_BRANCHES, 0xC2, 0 }, { PMC_EV_K7_RETIRED_BRANCHES_MISPREDICTED, 0xC3, 0 }, { PMC_EV_K7_RETIRED_TAKEN_BRANCHES, 0xC4, 0 }, { PMC_EV_K7_RETIRED_TAKEN_BRANCHES_MISPREDICTED, 0xC5, 0 }, { PMC_EV_K7_RETIRED_FAR_CONTROL_TRANSFERS, 0xC6, 0 }, { PMC_EV_K7_RETIRED_RESYNC_BRANCHES, 0xC7, 0 }, { PMC_EV_K7_INTERRUPTS_MASKED_CYCLES, 0xCD, 0 }, { PMC_EV_K7_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, 0xCE, 0 }, { PMC_EV_K7_HARDWARE_INTERRUPTS, 0xCF, 0 }, #endif { PMC_EV_K8_FP_DISPATCHED_FPU_OPS, 0x00, 0x3F }, { PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, 0x01, 0x00 }, { PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS, 0x02, 0x00 }, { PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD, 0x20, 0x7F }, { PMC_EV_K8_LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE, 0x21, 0x00 }, { PMC_EV_K8_LS_MICROARCHITECTURAL_RESYNC_BY_SNOOP, 0x22, 0x00 }, { PMC_EV_K8_LS_BUFFER2_FULL, 0x23, 0x00 }, { PMC_EV_K8_LS_LOCKED_OPERATION, 0x24, 0x07 }, { PMC_EV_K8_LS_MICROARCHITECTURAL_LATE_CANCEL, 0x25, 0x00 }, { PMC_EV_K8_LS_RETIRED_CFLUSH_INSTRUCTIONS, 0x26, 0x00 }, { PMC_EV_K8_LS_RETIRED_CPUID_INSTRUCTIONS, 0x27, 0x00 }, { PMC_EV_K8_DC_ACCESS, 0x40, 0x00 }, { PMC_EV_K8_DC_MISS, 0x41, 0x00 }, { PMC_EV_K8_DC_REFILL_FROM_L2, 0x42, 0x1F }, { PMC_EV_K8_DC_REFILL_FROM_SYSTEM, 0x43, 0x1F }, { PMC_EV_K8_DC_COPYBACK, 0x44, 0x1F }, { PMC_EV_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_HIT, 0x45, 0x00 }, { PMC_EV_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_MISS, 0x46, 0x00 }, { PMC_EV_K8_DC_MISALIGNED_DATA_REFERENCE, 0x47, 0x00 }, { PMC_EV_K8_DC_MICROARCHITECTURAL_LATE_CANCEL, 0x48, 0x00 }, { PMC_EV_K8_DC_MICROARCHITECTURAL_EARLY_CANCEL, 0x49, 0x00 }, { PMC_EV_K8_DC_ONE_BIT_ECC_ERROR, 0x4A, 0x03 }, { PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS, 0x4B, 0x07 }, { PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS, 0x4C, 0x03 }, { PMC_EV_K8_BU_CPU_CLK_UNHALTED, 0x76, 0x00 }, { PMC_EV_K8_BU_INTERNAL_L2_REQUEST, 0x7D, 0x1F }, { PMC_EV_K8_BU_FILL_REQUEST_L2_MISS, 0x7E, 0x07 }, { PMC_EV_K8_BU_FILL_INTO_L2, 0x7F, 0x03 }, { PMC_EV_K8_IC_FETCH, 0x80, 0x00 }, { PMC_EV_K8_IC_MISS, 0x81, 0x00 }, { PMC_EV_K8_IC_REFILL_FROM_L2, 0x82, 0x00 }, { PMC_EV_K8_IC_REFILL_FROM_SYSTEM, 0x83, 0x00 }, { PMC_EV_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_HIT, 0x84, 0x00 }, { PMC_EV_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_MISS, 0x85, 0x00 }, { PMC_EV_K8_IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP, 0x86, 0x00 }, { PMC_EV_K8_IC_INSTRUCTION_FETCH_STALL, 0x87, 0x00 }, { PMC_EV_K8_IC_RETURN_STACK_HIT, 0x88, 0x00 }, { PMC_EV_K8_IC_RETURN_STACK_OVERFLOW, 0x89, 0x00 }, { PMC_EV_K8_FR_RETIRED_X86_INSTRUCTIONS, 0xC0, 0x00 }, { PMC_EV_K8_FR_RETIRED_UOPS, 0xC1, 0x00 }, { PMC_EV_K8_FR_RETIRED_BRANCHES, 0xC2, 0x00 }, { PMC_EV_K8_FR_RETIRED_BRANCHES_MISPREDICTED, 0xC3, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES, 0xC4, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED, 0xC5, 0x00 }, { PMC_EV_K8_FR_RETIRED_FAR_CONTROL_TRANSFERS, 0xC6, 0x00 }, { PMC_EV_K8_FR_RETIRED_RESYNCS, 0xC7, 0x00 }, { PMC_EV_K8_FR_RETIRED_NEAR_RETURNS, 0xC8, 0x00 }, { PMC_EV_K8_FR_RETIRED_NEAR_RETURNS_MISPREDICTED, 0xC9, 0x00 }, { PMC_EV_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE, 0xCA, 0x00 }, { PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS, 0xCB, 0x0F }, { PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS, 0xCC, 0x07 }, { PMC_EV_K8_FR_INTERRUPTS_MASKED_CYCLES, 0xCD, 0x00 }, { PMC_EV_K8_FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, 0xCE, 0x00 }, { PMC_EV_K8_FR_TAKEN_HARDWARE_INTERRUPTS, 0xCF, 0x00 }, { PMC_EV_K8_FR_DECODER_EMPTY, 0xD0, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALLS, 0xD1, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FROM_BRANCH_ABORT_TO_RETIRE, 0xD2, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FOR_SERIALIZATION, 0xD3, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_FOR_SEGMENT_LOAD, 0xD4, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL, 0xD5, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL, 0xD6, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_FPU_IS_FULL, 0xD7, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_LS_IS_FULL, 0xD8, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET, 0xD9, 0x00 }, { PMC_EV_K8_FR_DISPATCH_STALL_WHEN_FAR_XFER_OR_RESYNC_BRANCH_PENDING, 0xDA, 0x00 }, { PMC_EV_K8_FR_FPU_EXCEPTIONS, 0xDB, 0x0F }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR0, 0xDC, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR1, 0xDD, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR2, 0xDE, 0x00 }, { PMC_EV_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR3, 0xDF, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT, 0xE0, 0x7 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW, 0xE1, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED, 0xE2, 0x00 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND, 0xE3, 0x07 }, { PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION, 0xE4, 0x0F }, { PMC_EV_K8_NB_SIZED_COMMANDS, 0xEB, 0x7F }, { PMC_EV_K8_NB_PROBE_RESULT, 0xEC, 0x0F }, { PMC_EV_K8_NB_HT_BUS0_BANDWIDTH, 0xF6, 0x0F }, { PMC_EV_K8_NB_HT_BUS1_BANDWIDTH, 0xF7, 0x0F }, { PMC_EV_K8_NB_HT_BUS2_BANDWIDTH, 0xF8, 0x0F } }; const int amd_event_codes_size = nitems(amd_event_codes); /* * Per-processor information */ struct amd_cpu { struct pmc_hw pc_amdpmcs[AMD_NPMCS]; }; static struct amd_cpu **amd_pcpu; /* * read a pmc register */ static int amd_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { enum pmc_mode mode; const struct amd_descr *pd; pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); KASSERT(amd_pcpu[cpu], ("[amd,%d] null per-cpu, cpu %d", __LINE__, cpu)); pd = &amd_pmcdesc[ri]; mode = PMC_TO_MODE(pm); PMCDBG2(MDP,REA,1,"amd-read id=%d class=%d", ri, pd->pm_descr.pd_class); #ifdef HWPMC_DEBUG KASSERT(pd->pm_descr.pd_class == amd_pmc_class, ("[amd,%d] unknown PMC class (%d)", __LINE__, pd->pm_descr.pd_class)); #endif tmp = rdmsr(pd->pm_perfctr); /* RDMSR serializes */ PMCDBG2(MDP,REA,2,"amd-read (pre-munge) id=%d -> %jd", ri, tmp); if (PMC_IS_SAMPLING_MODE(mode)) { /* * Clamp value to 0 if the counter just overflowed, * otherwise the returned reload count would wrap to a * huge value. */ if ((tmp & (1ULL << 47)) == 0) tmp = 0; else { /* Sign extend 48 bit value to 64 bits. */ tmp = (pmc_value_t) ((int64_t)(tmp << 16) >> 16); tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); } } *v = tmp; PMCDBG2(MDP,REA,2,"amd-read (post-munge) id=%d -> %jd", ri, *v); return 0; } /* * Write a PMC MSR. */ static int amd_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { const struct amd_descr *pd; enum pmc_mode mode; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); pd = &amd_pmcdesc[ri]; mode = PMC_TO_MODE(pm); #ifdef HWPMC_DEBUG KASSERT(pd->pm_descr.pd_class == amd_pmc_class, ("[amd,%d] unknown PMC class (%d)", __LINE__, pd->pm_descr.pd_class)); #endif /* use 2's complement of the count for sampling mode PMCs */ if (PMC_IS_SAMPLING_MODE(mode)) v = AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v); PMCDBG3(MDP,WRI,1,"amd-write cpu=%d ri=%d v=%jx", cpu, ri, v); /* write the PMC value */ wrmsr(pd->pm_perfctr, v); return 0; } /* * configure hardware pmc according to the configuration recorded in * pmc 'pm'. */ static int amd_config_pmc(int cpu, int ri, struct pmc *pm) { struct pmc_hw *phw; PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; KASSERT(pm == NULL || phw->phw_pmc == NULL, ("[amd,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__, pm, phw->phw_pmc)); phw->phw_pmc = pm; return 0; } /* * Retrieve a configured PMC pointer from hardware state. */ static int amd_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = amd_pcpu[cpu]->pc_amdpmcs[ri].phw_pmc; return 0; } /* * Machine dependent actions taken during the context switch in of a * thread. */ static int amd_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; PMCDBG3(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); /* enable the RDPMC instruction if needed */ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); return 0; } /* * Machine dependent actions taken during the context switch out of a * thread. */ static int amd_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; /* can be NULL */ PMCDBG3(MDP,SWO,1, "pc=%p pp=%p enable-msr=%d", pc, pp, pp ? (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) == 1 : 0); /* always turn off the RDPMC instruction */ load_cr4(rcr4() & ~CR4_PCE); return 0; } /* * Check if a given allocation is feasible. */ static int amd_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { int i; uint64_t allowed_unitmask, caps, config, unitmask; enum pmc_event pe; const struct pmc_descr *pd; (void) cpu; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row index %d", __LINE__, ri)); pd = &amd_pmcdesc[ri].pm_descr; /* check class match */ if (pd->pd_class != a->pm_class) return EINVAL; + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + caps = pm->pm_caps; PMCDBG2(MDP,ALL,1,"amd-allocate ri=%d caps=0x%x", ri, caps); if((ri >= 0 && ri < 6) && !(a->pm_md.pm_amd.pm_amd_sub_class == PMC_AMD_SUB_CLASS_CORE)) return EINVAL; if((ri >= 6 && ri < 12) && !(a->pm_md.pm_amd.pm_amd_sub_class == PMC_AMD_SUB_CLASS_L3_CACHE)) return EINVAL; if((ri >= 12 && ri < 16) && !(a->pm_md.pm_amd.pm_amd_sub_class == PMC_AMD_SUB_CLASS_DATA_FABRIC)) return EINVAL; if (strlen(pmc_cpuid) != 0) { pm->pm_md.pm_amd.pm_amd_evsel = a->pm_md.pm_amd.pm_amd_config; PMCDBG2(MDP,ALL,2,"amd-allocate ri=%d -> config=0x%x", ri, a->pm_md.pm_amd.pm_amd_config); return (0); } pe = a->pm_ev; /* map ev to the correct event mask code */ config = allowed_unitmask = 0; for (i = 0; i < amd_event_codes_size; i++) if (amd_event_codes[i].pe_ev == pe) { config = AMD_PMC_TO_EVENTMASK(amd_event_codes[i].pe_code); allowed_unitmask = AMD_PMC_TO_UNITMASK(amd_event_codes[i].pe_mask); break; } if (i == amd_event_codes_size) return EINVAL; unitmask = a->pm_md.pm_amd.pm_amd_config & AMD_PMC_UNITMASK; if (unitmask & ~allowed_unitmask) /* disallow reserved bits */ return EINVAL; if (unitmask && (caps & PMC_CAP_QUALIFIER)) config |= unitmask; if (caps & PMC_CAP_THRESHOLD) config |= a->pm_md.pm_amd.pm_amd_config & AMD_PMC_COUNTERMASK; /* set at least one of the 'usr' or 'os' caps */ if (caps & PMC_CAP_USER) config |= AMD_PMC_USR; if (caps & PMC_CAP_SYSTEM) config |= AMD_PMC_OS; if ((caps & (PMC_CAP_USER|PMC_CAP_SYSTEM)) == 0) config |= (AMD_PMC_USR|AMD_PMC_OS); if (caps & PMC_CAP_EDGE) config |= AMD_PMC_EDGE; if (caps & PMC_CAP_INVERT) config |= AMD_PMC_INVERT; if (caps & PMC_CAP_INTERRUPT) config |= AMD_PMC_INT; pm->pm_md.pm_amd.pm_amd_evsel = config; /* save config value */ PMCDBG2(MDP,ALL,2,"amd-allocate ri=%d -> config=0x%x", ri, config); return 0; } /* * Release machine dependent state associated with a PMC. This is a * no-op on this architecture. * */ /* ARGSUSED0 */ static int amd_release_pmc(int cpu, int ri, struct pmc *pmc) { #ifdef HWPMC_DEBUG const struct amd_descr *pd; #endif struct pmc_hw *phw __diagused; (void) pmc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; KASSERT(phw->phw_pmc == NULL, ("[amd,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); #ifdef HWPMC_DEBUG pd = &amd_pmcdesc[ri]; if (pd->pm_descr.pd_class == amd_pmc_class) KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] PMC %d released while active", __LINE__, ri)); #endif return 0; } /* * start a PMC. */ static int amd_start_pmc(int cpu, int ri, struct pmc *pm) { uint64_t config; const struct amd_descr *pd; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); pd = &amd_pmcdesc[ri]; PMCDBG2(MDP,STA,1,"amd-start cpu=%d ri=%d", cpu, ri); KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] pmc%d,cpu%d: Starting active PMC \"%s\"", __LINE__, ri, cpu, pd->pm_descr.pd_name)); /* turn on the PMC ENABLE bit */ config = pm->pm_md.pm_amd.pm_amd_evsel | AMD_PMC_ENABLE; PMCDBG1(MDP,STA,2,"amd-start config=0x%x", config); wrmsr(pd->pm_evsel, config); return 0; } /* * Stop a PMC. */ static int amd_stop_pmc(int cpu, int ri, struct pmc *pm) { const struct amd_descr *pd; uint64_t config; int i; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] illegal row-index %d", __LINE__, ri)); pd = &amd_pmcdesc[ri]; KASSERT(!AMD_PMC_IS_STOPPED(pd->pm_evsel), ("[amd,%d] PMC%d, CPU%d \"%s\" already stopped", __LINE__, ri, cpu, pd->pm_descr.pd_name)); PMCDBG1(MDP,STO,1,"amd-stop ri=%d", ri); /* turn off the PMC ENABLE bit */ config = pm->pm_md.pm_amd.pm_amd_evsel & ~AMD_PMC_ENABLE; wrmsr(pd->pm_evsel, config); /* * Due to NMI latency on newer AMD processors * NMI interrupts are ignored, which leads to * panic or messages based on kernel configuration */ /* Wait for the count to be reset */ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { if (rdmsr(pd->pm_perfctr) & (1 << (pd->pm_descr.pd_width - 1))) break; DELAY(1); } return 0; } /* * Interrupt handler. This function needs to return '1' if the * interrupt was this CPU's PMCs or '0' otherwise. It is not allowed * to sleep or do anything a 'fast' interrupt handler is not allowed * to do. */ static int amd_intr(struct trapframe *tf) { int i, error, retval, cpu; uint64_t config, evsel, perfctr; struct pmc *pm; struct amd_cpu *pac; pmc_value_t v; uint32_t active = 0, count = 0; cpu = curcpu; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] out of range CPU %d", __LINE__, cpu)); PMCDBG3(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); retval = 0; pac = amd_pcpu[cpu]; /* * look for all PMCs that have interrupted: * - look for a running, sampling PMC which has overflowed * and which has a valid 'struct pmc' association * * If found, we call a helper to process the interrupt. * * PMCs interrupting at the same time are collapsed into * a single interrupt. Check all the valid pmcs for * overflow. */ for (i = 0; i < AMD_CORE_NPMCS; i++) { if ((pm = pac->pc_amdpmcs[i].phw_pmc) == NULL || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { continue; } /* Consider pmc with valid handle as active */ active++; if (!AMD_PMC_HAS_OVERFLOWED(i)) continue; retval = 1; /* Found an interrupting PMC. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* Stop the PMC, reload count. */ evsel = amd_pmcdesc[i].pm_evsel; perfctr = amd_pmcdesc[i].pm_perfctr; v = pm->pm_sc.pm_reloadcount; config = rdmsr(evsel); KASSERT((config & ~AMD_PMC_ENABLE) == (pm->pm_md.pm_amd.pm_amd_evsel & ~AMD_PMC_ENABLE), ("[amd,%d] config mismatch reg=0x%jx pm=0x%jx", __LINE__, (uintmax_t)config, (uintmax_t)pm->pm_md.pm_amd.pm_amd_evsel)); wrmsr(evsel, config & ~AMD_PMC_ENABLE); wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v)); /* Restart the counter if logging succeeded. */ error = pmc_process_interrupt(PMC_HR, pm, tf); if (error == 0) wrmsr(evsel, config); } /* * Due to NMI latency, there can be a scenario in which * multiple pmcs gets serviced in an earlier NMI and we * do not find an overflow in the subsequent NMI. * * For such cases we keep a per-cpu count of active NMIs * and compare it with min(active pmcs, 2) to determine * if this NMI was for a pmc overflow which was serviced * in an earlier request or should be ignored. */ if (retval) { DPCPU_SET(nmi_counter, min(2, active)); } else { if ((count = DPCPU_GET(nmi_counter))) { retval = 1; DPCPU_SET(nmi_counter, --count); } } if (retval) counter_u64_add(pmc_stats.pm_intr_processed, 1); else counter_u64_add(pmc_stats.pm_intr_ignored, 1); PMCDBG1(MDP,INT,2, "retval=%d", retval); return (retval); } /* * describe a PMC */ static int amd_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { const struct amd_descr *pd; struct pmc_hw *phw; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] row-index %d out of range", __LINE__, ri)); phw = &amd_pcpu[cpu]->pc_amdpmcs[ri]; pd = &amd_pmcdesc[ri]; strlcpy(pi->pm_name, pd->pm_descr.pd_name, sizeof(pi->pm_name)); pi->pm_class = pd->pm_descr.pd_class; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return 0; } /* * i386 specific entry points */ /* * return the MSR address of the given PMC. */ static int amd_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < AMD_NPMCS, ("[amd,%d] ri %d out of range", __LINE__, ri)); *msr = amd_pmcdesc[ri].pm_perfctr - AMD_PMC_PERFCTR_0; return (0); } /* * processor dependent initialization. */ static int amd_pcpu_init(struct pmc_mdep *md, int cpu) { int classindex, first_ri, n; struct pmc_cpu *pc; struct amd_cpu *pac; struct pmc_hw *phw; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] insane cpu number %d", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"amd-init cpu=%d", cpu); amd_pcpu[cpu] = pac = malloc(sizeof(struct amd_cpu), M_PMC, M_WAITOK|M_ZERO); /* * Set the content of the hardware descriptors to a known * state and initialize pointers in the MI per-cpu descriptor. */ pc = pmc_pcpu[cpu]; #if defined(__amd64__) classindex = PMC_MDEP_CLASS_INDEX_K8; #elif defined(__i386__) classindex = md->pmd_cputype == PMC_CPU_AMD_K8 ? PMC_MDEP_CLASS_INDEX_K8 : PMC_MDEP_CLASS_INDEX_K7; #endif first_ri = md->pmd_classdep[classindex].pcd_ri; KASSERT(pc != NULL, ("[amd,%d] NULL per-cpu pointer", __LINE__)); for (n = 0, phw = pac->pc_amdpmcs; n < AMD_NPMCS; n++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); phw->phw_pmc = NULL; pc->pc_hwpmcs[n + first_ri] = phw; } return (0); } /* * processor dependent cleanup prior to the KLD * being unloaded */ static int amd_pcpu_fini(struct pmc_mdep *md, int cpu) { int classindex, first_ri, i; uint32_t evsel; struct pmc_cpu *pc; struct amd_cpu *pac; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[amd,%d] insane cpu number (%d)", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"amd-cleanup cpu=%d", cpu); /* * First, turn off all PMCs on this CPU. */ for (i = 0; i < 4; i++) { /* XXX this loop is now not needed */ evsel = rdmsr(AMD_PMC_EVSEL_0 + i); evsel &= ~AMD_PMC_ENABLE; wrmsr(AMD_PMC_EVSEL_0 + i, evsel); } /* * Next, free up allocated space. */ if ((pac = amd_pcpu[cpu]) == NULL) return (0); amd_pcpu[cpu] = NULL; #ifdef HWPMC_DEBUG for (i = 0; i < AMD_NPMCS; i++) { KASSERT(pac->pc_amdpmcs[i].phw_pmc == NULL, ("[amd,%d] CPU%d/PMC%d in use", __LINE__, cpu, i)); KASSERT(AMD_PMC_IS_STOPPED(AMD_PMC_EVSEL_0 + i), ("[amd,%d] CPU%d/PMC%d not stopped", __LINE__, cpu, i)); } #endif pc = pmc_pcpu[cpu]; KASSERT(pc != NULL, ("[amd,%d] NULL per-cpu state", __LINE__)); #if defined(__amd64__) classindex = PMC_MDEP_CLASS_INDEX_K8; #elif defined(__i386__) classindex = md->pmd_cputype == PMC_CPU_AMD_K8 ? PMC_MDEP_CLASS_INDEX_K8 : PMC_MDEP_CLASS_INDEX_K7; #endif first_ri = md->pmd_classdep[classindex].pcd_ri; /* * Reset pointers in the MI 'per-cpu' state. */ for (i = 0; i < AMD_NPMCS; i++) { pc->pc_hwpmcs[i + first_ri] = NULL; } free(pac, M_PMC); return (0); } /* * Initialize ourselves. */ struct pmc_mdep * pmc_amd_initialize(void) { int classindex, error, i, ncpus; struct pmc_classdep *pcd; enum pmc_cputype cputype; struct pmc_mdep *pmc_mdep; enum pmc_class class; int family, model, stepping; char *name; /* * The presence of hardware performance counters on the AMD * Athlon, Duron or later processors, is _not_ indicated by * any of the processor feature flags set by the 'CPUID' * instruction, so we only check the 'instruction family' * field returned by CPUID for instruction family >= 6. */ name = NULL; family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); stepping = CPUID_TO_STEPPING(cpu_id); if (family == 0x18) snprintf(pmc_cpuid, sizeof(pmc_cpuid), "HygonGenuine-%d-%02X-%X", family, model, stepping); else snprintf(pmc_cpuid, sizeof(pmc_cpuid), "AuthenticAMD-%d-%02X-%X", family, model, stepping); switch (cpu_id & 0xF00) { #if defined(__i386__) case 0x600: /* Athlon(tm) processor */ classindex = PMC_MDEP_CLASS_INDEX_K7; cputype = PMC_CPU_AMD_K7; class = PMC_CLASS_K7; name = "K7"; break; #endif case 0xF00: /* Athlon64/Opteron processor */ classindex = PMC_MDEP_CLASS_INDEX_K8; cputype = PMC_CPU_AMD_K8; class = PMC_CLASS_K8; name = "K8"; break; default: (void) printf("pmc: Unknown AMD CPU %x %d-%d.\n", cpu_id, (cpu_id & 0xF00) >> 8, model); return NULL; } #ifdef HWPMC_DEBUG amd_pmc_class = class; #endif /* * Allocate space for pointers to PMC HW descriptors and for * the MDEP structure used by MI code. */ amd_pcpu = malloc(sizeof(struct amd_cpu *) * pmc_cpu_max(), M_PMC, M_WAITOK|M_ZERO); /* * These processors have two classes of PMCs: the TSC and * programmable PMCs. */ pmc_mdep = pmc_mdep_alloc(2); pmc_mdep->pmd_cputype = cputype; ncpus = pmc_cpu_max(); /* Initialize the TSC. */ error = pmc_tsc_initialize(pmc_mdep, ncpus); if (error) goto error; /* Initialize AMD K7 and K8 PMC handling. */ pcd = &pmc_mdep->pmd_classdep[classindex]; pcd->pcd_caps = AMD_PMC_CAPS; pcd->pcd_class = class; pcd->pcd_num = AMD_NPMCS; pcd->pcd_ri = pmc_mdep->pmd_npmc; pcd->pcd_width = 48; /* fill in the correct pmc name and class */ for (i = 0; i < AMD_NPMCS; i++) { (void) snprintf(amd_pmcdesc[i].pm_descr.pd_name, sizeof(amd_pmcdesc[i].pm_descr.pd_name), "%s-%d", name, i); amd_pmcdesc[i].pm_descr.pd_class = class; } pcd->pcd_allocate_pmc = amd_allocate_pmc; pcd->pcd_config_pmc = amd_config_pmc; pcd->pcd_describe = amd_describe; pcd->pcd_get_config = amd_get_config; pcd->pcd_get_msr = amd_get_msr; pcd->pcd_pcpu_fini = amd_pcpu_fini; pcd->pcd_pcpu_init = amd_pcpu_init; pcd->pcd_read_pmc = amd_read_pmc; pcd->pcd_release_pmc = amd_release_pmc; pcd->pcd_start_pmc = amd_start_pmc; pcd->pcd_stop_pmc = amd_stop_pmc; pcd->pcd_write_pmc = amd_write_pmc; pmc_mdep->pmd_intr = amd_intr; pmc_mdep->pmd_switch_in = amd_switch_in; pmc_mdep->pmd_switch_out = amd_switch_out; pmc_mdep->pmd_npmc += AMD_NPMCS; PMCDBG0(MDP,INI,0,"amd-initialize"); return (pmc_mdep); error: if (error) { free(pmc_mdep, M_PMC); pmc_mdep = NULL; } return (NULL); } /* * Finalization code for AMD CPUs. */ void pmc_amd_finalize(struct pmc_mdep *md) { #if defined(INVARIANTS) int classindex, i, ncpus, pmcclass; #endif pmc_tsc_finalize(md); KASSERT(amd_pcpu != NULL, ("[amd,%d] NULL per-cpu array pointer", __LINE__)); #if defined(INVARIANTS) switch (md->pmd_cputype) { #if defined(__i386__) case PMC_CPU_AMD_K7: classindex = PMC_MDEP_CLASS_INDEX_K7; pmcclass = PMC_CLASS_K7; break; #endif default: classindex = PMC_MDEP_CLASS_INDEX_K8; pmcclass = PMC_CLASS_K8; } KASSERT(md->pmd_classdep[classindex].pcd_class == pmcclass, ("[amd,%d] pmc class mismatch", __LINE__)); ncpus = pmc_cpu_max(); for (i = 0; i < ncpus; i++) KASSERT(amd_pcpu[i] == NULL, ("[amd,%d] non-null pcpu", __LINE__)); #endif free(amd_pcpu, M_PMC); amd_pcpu = NULL; } diff --git a/sys/dev/hwpmc/hwpmc_arm64.c b/sys/dev/hwpmc/hwpmc_arm64.c index 995b7158ac20..9a5debb8016b 100644 --- a/sys/dev/hwpmc/hwpmc_arm64.c +++ b/sys/dev/hwpmc/hwpmc_arm64.c @@ -1,615 +1,615 @@ /*- * Copyright (c) 2015 Ruslan Bukin * All rights reserved. * * This software was developed by the University of Cambridge Computer * Laboratory with support from ARM Ltd. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "opt_acpi.h" static int arm64_npmcs; struct arm64_event_code_map { enum pmc_event pe_ev; uint8_t pe_code; }; /* * Per-processor information. */ struct arm64_cpu { struct pmc_hw *pc_arm64pmcs; }; static struct arm64_cpu **arm64_pcpu; /* * Interrupt Enable Set Register */ static __inline void arm64_interrupt_enable(uint32_t pmc) { uint32_t reg; reg = (1 << pmc); WRITE_SPECIALREG(pmintenset_el1, reg); isb(); } /* * Interrupt Clear Set Register */ static __inline void arm64_interrupt_disable(uint32_t pmc) { uint32_t reg; reg = (1 << pmc); WRITE_SPECIALREG(pmintenclr_el1, reg); isb(); } /* * Counter Set Enable Register */ static __inline void arm64_counter_enable(unsigned int pmc) { uint32_t reg; reg = (1 << pmc); WRITE_SPECIALREG(pmcntenset_el0, reg); isb(); } /* * Counter Clear Enable Register */ static __inline void arm64_counter_disable(unsigned int pmc) { uint32_t reg; reg = (1 << pmc); WRITE_SPECIALREG(pmcntenclr_el0, reg); isb(); } /* * Performance Monitors Control Register */ static uint32_t arm64_pmcr_read(void) { uint32_t reg; reg = READ_SPECIALREG(pmcr_el0); return (reg); } static void arm64_pmcr_write(uint32_t reg) { WRITE_SPECIALREG(pmcr_el0, reg); isb(); } /* * Performance Count Register N */ static uint32_t arm64_pmcn_read(unsigned int pmc) { KASSERT(pmc < arm64_npmcs, ("%s: illegal PMC number %d", __func__, pmc)); WRITE_SPECIALREG(pmselr_el0, pmc); isb(); return (READ_SPECIALREG(pmxevcntr_el0)); } static void arm64_pmcn_write(unsigned int pmc, uint32_t reg) { KASSERT(pmc < arm64_npmcs, ("%s: illegal PMC number %d", __func__, pmc)); WRITE_SPECIALREG(pmselr_el0, pmc); WRITE_SPECIALREG(pmxevcntr_el0, reg); isb(); } static int arm64_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint32_t config; enum pmc_event pe; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] illegal row index %d", __LINE__, ri)); if (a->pm_class != PMC_CLASS_ARMV8) { return (EINVAL); } pe = a->pm_ev; - /* Adjust the config value if needed. */ - config = a->pm_md.pm_md_config; - if ((a->pm_md.pm_md_flags & PM_MD_RAW_EVENT) == 0) { + if ((a->pm_flags & PMC_F_EV_PMU) != 0) { + config = a->pm_md.pm_md_config; + } else { config = (uint32_t)pe - PMC_EV_ARMV8_FIRST; if (config > (PMC_EV_ARMV8_LAST - PMC_EV_ARMV8_FIRST)) return (EINVAL); } switch (a->pm_caps & (PMC_CAP_SYSTEM | PMC_CAP_USER)) { case PMC_CAP_SYSTEM: config |= PMEVTYPER_U; break; case PMC_CAP_USER: config |= PMEVTYPER_P; break; default: /* * Trace both USER and SYSTEM if none are specified * (default setting) or if both flags are specified * (user explicitly requested both qualifiers). */ break; } pm->pm_md.pm_arm64.pm_arm64_evsel = config; PMCDBG2(MDP, ALL, 2, "arm64-allocate ri=%d -> config=0x%x", ri, config); return (0); } static int arm64_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { pmc_value_t tmp; register_t s; int reg; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] illegal row index %d", __LINE__, ri)); /* * Ensure we don't get interrupted while updating the overflow count. */ s = intr_disable(); tmp = arm64_pmcn_read(ri); reg = (1 << ri); if ((READ_SPECIALREG(pmovsclr_el0) & reg) != 0) { /* Clear Overflow Flag */ WRITE_SPECIALREG(pmovsclr_el0, reg); pm->pm_pcpu_state[cpu].pps_overflowcnt++; /* Reread counter in case we raced. */ tmp = arm64_pmcn_read(ri); } tmp += 0x100000000llu * pm->pm_pcpu_state[cpu].pps_overflowcnt; intr_restore(s); PMCDBG2(MDP, REA, 2, "arm64-read id=%d -> %jd", ri, tmp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { /* * Clamp value to 0 if the counter just overflowed, * otherwise the returned reload count would wrap to a * huge value. */ if ((tmp & (1ull << 63)) == 0) tmp = 0; else tmp = ARMV8_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); } *v = tmp; return (0); } static int arm64_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] illegal row-index %d", __LINE__, ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = ARMV8_RELOAD_COUNT_TO_PERFCTR_VALUE(v); PMCDBG3(MDP, WRI, 1, "arm64-write cpu=%d ri=%d v=%jx", cpu, ri, v); pm->pm_pcpu_state[cpu].pps_overflowcnt = v >> 32; arm64_pmcn_write(ri, v); return (0); } static int arm64_config_pmc(int cpu, int ri, struct pmc *pm) { struct pmc_hw *phw; PMCDBG3(MDP, CFG, 1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] illegal row-index %d", __LINE__, ri)); phw = &arm64_pcpu[cpu]->pc_arm64pmcs[ri]; KASSERT(pm == NULL || phw->phw_pmc == NULL, ("[arm64,%d] pm=%p phw->pm=%p hwpmc not unconfigured", __LINE__, pm, phw->phw_pmc)); phw->phw_pmc = pm; return (0); } static int arm64_start_pmc(int cpu, int ri, struct pmc *pm) { uint32_t config; config = pm->pm_md.pm_arm64.pm_arm64_evsel; /* * Configure the event selection. */ WRITE_SPECIALREG(pmselr_el0, ri); WRITE_SPECIALREG(pmxevtyper_el0, config); isb(); /* * Enable the PMC. */ arm64_interrupt_enable(ri); arm64_counter_enable(ri); return (0); } static int arm64_stop_pmc(int cpu, int ri, struct pmc *pm __unused) { /* * Disable the PMCs. */ arm64_counter_disable(ri); arm64_interrupt_disable(ri); return (0); } static int arm64_release_pmc(int cpu, int ri, struct pmc *pmc) { struct pmc_hw *phw __diagused; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] illegal row-index %d", __LINE__, ri)); phw = &arm64_pcpu[cpu]->pc_arm64pmcs[ri]; KASSERT(phw->phw_pmc == NULL, ("[arm64,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); return (0); } static int arm64_intr(struct trapframe *tf) { int retval, ri; struct pmc *pm; int error; int reg, cpu; cpu = curcpu; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] CPU %d out of range", __LINE__, cpu)); PMCDBG3(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *)tf, TRAPF_USERMODE(tf)); retval = 0; for (ri = 0; ri < arm64_npmcs; ri++) { pm = arm64_pcpu[cpu]->pc_arm64pmcs[ri].phw_pmc; if (pm == NULL) continue; /* Check if counter is overflowed */ reg = (1 << ri); if ((READ_SPECIALREG(pmovsclr_el0) & reg) == 0) continue; /* Clear Overflow Flag */ WRITE_SPECIALREG(pmovsclr_el0, reg); isb(); retval = 1; /* Found an interrupting PMC. */ pm->pm_pcpu_state[cpu].pps_overflowcnt += 1; if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; if (pm->pm_state != PMC_STATE_RUNNING) continue; error = pmc_process_interrupt(PMC_HR, pm, tf); if (error) arm64_stop_pmc(cpu, ri, pm); /* Reload sampling count */ arm64_write_pmc(cpu, ri, pm, pm->pm_sc.pm_reloadcount); } return (retval); } static int arm64_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { struct pmc_hw *phw; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d], illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < arm64_npmcs, ("[arm64,%d] row-index %d out of range", __LINE__, ri)); phw = &arm64_pcpu[cpu]->pc_arm64pmcs[ri]; snprintf(pi->pm_name, sizeof(pi->pm_name), "ARMV8-%d", ri); pi->pm_class = PMC_CLASS_ARMV8; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int arm64_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = arm64_pcpu[cpu]->pc_arm64pmcs[ri].phw_pmc; return (0); } static int arm64_pcpu_init(struct pmc_mdep *md, int cpu) { struct arm64_cpu *pac; struct pmc_hw *phw; struct pmc_cpu *pc; uint64_t pmcr; int first_ri; int i; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[arm64,%d] wrong cpu number %d", __LINE__, cpu)); PMCDBG0(MDP, INI, 1, "arm64-pcpu-init"); arm64_pcpu[cpu] = pac = malloc(sizeof(struct arm64_cpu), M_PMC, M_WAITOK | M_ZERO); pac->pc_arm64pmcs = malloc(sizeof(struct pmc_hw) * arm64_npmcs, M_PMC, M_WAITOK | M_ZERO); pc = pmc_pcpu[cpu]; first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_ARMV8].pcd_ri; KASSERT(pc != NULL, ("[arm64,%d] NULL per-cpu pointer", __LINE__)); for (i = 0, phw = pac->pc_arm64pmcs; i < arm64_npmcs; i++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(i); phw->phw_pmc = NULL; pc->pc_hwpmcs[i + first_ri] = phw; } /* * Disable all counters and overflow interrupts. Upon reset they are in * an undefined state. * * Don't issue an isb here, just wait for the one in arm64_pmcr_write() * to make the writes visible. */ WRITE_SPECIALREG(pmcntenclr_el0, 0xffffffff); WRITE_SPECIALREG(pmintenclr_el1, 0xffffffff); /* Enable unit */ pmcr = arm64_pmcr_read(); pmcr |= PMCR_E; arm64_pmcr_write(pmcr); return (0); } static int arm64_pcpu_fini(struct pmc_mdep *md, int cpu) { uint32_t pmcr; PMCDBG0(MDP, INI, 1, "arm64-pcpu-fini"); pmcr = arm64_pmcr_read(); pmcr &= ~PMCR_E; arm64_pmcr_write(pmcr); free(arm64_pcpu[cpu]->pc_arm64pmcs, M_PMC); free(arm64_pcpu[cpu], M_PMC); arm64_pcpu[cpu] = NULL; return (0); } struct pmc_mdep * pmc_arm64_initialize(void) { struct pmc_mdep *pmc_mdep; struct pmc_classdep *pcd; int classes, idcode, impcode; int reg; uint64_t midr; reg = arm64_pmcr_read(); arm64_npmcs = (reg & PMCR_N_MASK) >> PMCR_N_SHIFT; impcode = (reg & PMCR_IMP_MASK) >> PMCR_IMP_SHIFT; idcode = (reg & PMCR_IDCODE_MASK) >> PMCR_IDCODE_SHIFT; PMCDBG1(MDP, INI, 1, "arm64-init npmcs=%d", arm64_npmcs); /* * Write the CPU model to kern.hwpmc.cpuid. * * We zero the variant and revision fields. * * TODO: how to handle differences between cores due to big.LITTLE? * For now, just use MIDR from CPU 0. */ midr = (uint64_t)(pcpu_find(0)->pc_midr); midr &= ~(CPU_VAR_MASK | CPU_REV_MASK); snprintf(pmc_cpuid, sizeof(pmc_cpuid), "0x%016lx", midr); /* * Allocate space for pointers to PMC HW descriptors and for * the MDEP structure used by MI code. */ arm64_pcpu = malloc(sizeof(struct arm64_cpu *) * pmc_cpu_max(), M_PMC, M_WAITOK | M_ZERO); /* One AArch64 CPU class */ classes = 1; #ifdef DEV_ACPI /* Query presence of optional classes and set max class. */ if (pmc_cmn600_nclasses() > 0) classes = MAX(classes, PMC_MDEP_CLASS_INDEX_CMN600); if (pmc_dmc620_nclasses() > 0) classes = MAX(classes, PMC_MDEP_CLASS_INDEX_DMC620_C); #endif pmc_mdep = pmc_mdep_alloc(classes); switch(impcode) { case PMCR_IMP_ARM: switch (idcode) { case PMCR_IDCODE_CORTEX_A76: case PMCR_IDCODE_NEOVERSE_N1: pmc_mdep->pmd_cputype = PMC_CPU_ARMV8_CORTEX_A76; break; case PMCR_IDCODE_CORTEX_A57: case PMCR_IDCODE_CORTEX_A72: pmc_mdep->pmd_cputype = PMC_CPU_ARMV8_CORTEX_A57; break; default: case PMCR_IDCODE_CORTEX_A53: pmc_mdep->pmd_cputype = PMC_CPU_ARMV8_CORTEX_A53; break; } break; default: pmc_mdep->pmd_cputype = PMC_CPU_ARMV8_CORTEX_A53; break; } pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_ARMV8]; pcd->pcd_caps = ARMV8_PMC_CAPS; pcd->pcd_class = PMC_CLASS_ARMV8; pcd->pcd_num = arm64_npmcs; pcd->pcd_ri = pmc_mdep->pmd_npmc; pcd->pcd_width = 32; pcd->pcd_allocate_pmc = arm64_allocate_pmc; pcd->pcd_config_pmc = arm64_config_pmc; pcd->pcd_pcpu_fini = arm64_pcpu_fini; pcd->pcd_pcpu_init = arm64_pcpu_init; pcd->pcd_describe = arm64_describe; pcd->pcd_get_config = arm64_get_config; pcd->pcd_read_pmc = arm64_read_pmc; pcd->pcd_release_pmc = arm64_release_pmc; pcd->pcd_start_pmc = arm64_start_pmc; pcd->pcd_stop_pmc = arm64_stop_pmc; pcd->pcd_write_pmc = arm64_write_pmc; pmc_mdep->pmd_intr = arm64_intr; pmc_mdep->pmd_npmc += arm64_npmcs; #ifdef DEV_ACPI if (pmc_cmn600_nclasses() > 0) pmc_cmn600_initialize(pmc_mdep); if (pmc_dmc620_nclasses() > 0) { pmc_dmc620_initialize_cd2(pmc_mdep); pmc_dmc620_initialize_c(pmc_mdep); } #endif return (pmc_mdep); } void pmc_arm64_finalize(struct pmc_mdep *md) { PMCDBG0(MDP, INI, 1, "arm64-finalize"); free(arm64_pcpu, M_PMC); } diff --git a/sys/dev/hwpmc/hwpmc_core.c b/sys/dev/hwpmc/hwpmc_core.c index 15b875e3af94..3829a03eb729 100644 --- a/sys/dev/hwpmc/hwpmc_core.c +++ b/sys/dev/hwpmc/hwpmc_core.c @@ -1,1259 +1,1265 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Intel Core PMCs. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define CORE_CPUID_REQUEST 0xA #define CORE_CPUID_REQUEST_SIZE 0x4 #define CORE_CPUID_EAX 0x0 #define CORE_CPUID_EBX 0x1 #define CORE_CPUID_ECX 0x2 #define CORE_CPUID_EDX 0x3 #define IAF_PMC_CAPS \ (PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \ PMC_CAP_USER | PMC_CAP_SYSTEM) #define IAF_RI_TO_MSR(RI) ((RI) + (1 << 30)) #define IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) #define EV_IS_NOTARCH 0 #define EV_IS_ARCH_SUPP 1 #define EV_IS_ARCH_NOTSUPP -1 /* * "Architectural" events defined by Intel. The values of these * symbols correspond to positions in the bitmask returned by * the CPUID.0AH instruction. */ enum core_arch_events { CORE_AE_BRANCH_INSTRUCTION_RETIRED = 5, CORE_AE_BRANCH_MISSES_RETIRED = 6, CORE_AE_INSTRUCTION_RETIRED = 1, CORE_AE_LLC_MISSES = 4, CORE_AE_LLC_REFERENCE = 3, CORE_AE_UNHALTED_REFERENCE_CYCLES = 2, CORE_AE_UNHALTED_CORE_CYCLES = 0 }; static enum pmc_cputype core_cputype; static int core_version; struct core_cpu { volatile uint32_t pc_iafctrl; /* Fixed function control. */ volatile uint64_t pc_globalctrl; /* Global control register. */ struct pmc_hw pc_corepmcs[]; }; static struct core_cpu **core_pcpu; static uint32_t core_architectural_events; static uint64_t core_pmcmask; static int core_iaf_ri; /* relative index of fixed counters */ static int core_iaf_width; static int core_iaf_npmc; static int core_iap_width; static int core_iap_npmc; static int core_iap_wroffset; static u_int pmc_alloc_refs; static bool pmc_tsx_force_abort_set; static int core_pcpu_noop(struct pmc_mdep *md, int cpu) { (void) md; (void) cpu; return (0); } static int core_pcpu_init(struct pmc_mdep *md, int cpu) { struct pmc_cpu *pc; struct core_cpu *cc; struct pmc_hw *phw; int core_ri, n, npmc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[iaf,%d] insane cpu number %d", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu); core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri; npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num; if (core_version >= 2) npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num; cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw), M_PMC, M_WAITOK | M_ZERO); core_pcpu[cpu] = cc; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL && cc != NULL, ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu)); for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n + core_ri); phw->phw_pmc = NULL; pc->pc_hwpmcs[n + core_ri] = phw; } if (core_version >= 2 && vm_guest == VM_GUEST_NO) { /* Enable Freezing PMCs on PMI. */ wrmsr(MSR_DEBUGCTLMSR, rdmsr(MSR_DEBUGCTLMSR) | 0x1000); } return (0); } static int core_pcpu_fini(struct pmc_mdep *md, int cpu) { int core_ri, n, npmc; struct pmc_cpu *pc; struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] insane cpu number (%d)", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu); if ((cc = core_pcpu[cpu]) == NULL) return (0); core_pcpu[cpu] = NULL; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__, cpu)); npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num; core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri; for (n = 0; n < npmc; n++) wrmsr(IAP_EVSEL0 + n, 0); if (core_version >= 2) { wrmsr(IAF_CTRL, 0); npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num; } for (n = 0; n < npmc; n++) pc->pc_hwpmcs[n + core_ri] = NULL; free(cc, M_PMC); return (0); } /* * Fixed function counters. */ static pmc_value_t iaf_perfctr_value_to_reload_count(pmc_value_t v) { /* If the PMC has overflowed, return a reload count of zero. */ if ((v & (1ULL << (core_iaf_width - 1))) == 0) return (0); v &= (1ULL << core_iaf_width) - 1; return (1ULL << core_iaf_width) - v; } static pmc_value_t iaf_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << core_iaf_width) - rlc; } static int iaf_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint8_t ev, umask; uint32_t caps; uint64_t config, flags; const struct pmc_md_iap_op_pmcallocate *iap; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps); if (ri < 0 || ri > core_iaf_npmc) return (EINVAL); if (a->pm_class != PMC_CLASS_IAF) return (EINVAL); + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + iap = &a->pm_md.pm_iap; config = iap->pm_iap_config; ev = IAP_EVSEL_GET(config); umask = IAP_UMASK_GET(config); if (ev == 0x0) { if (umask != ri + 1) return (EINVAL); } else { switch (ri) { case 0: /* INST_RETIRED.ANY */ if (ev != 0xC0 || umask != 0x00) return (EINVAL); break; case 1: /* CPU_CLK_UNHALTED.THREAD */ if (ev != 0x3C || umask != 0x00) return (EINVAL); break; case 2: /* CPU_CLK_UNHALTED.REF */ if (ev != 0x3C || umask != 0x01) return (EINVAL); break; case 3: /* TOPDOWN.SLOTS */ if (ev != 0xA4 || umask != 0x01) return (EINVAL); break; default: return (EINVAL); } } pmc_alloc_refs++; if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 && !pmc_tsx_force_abort_set) { pmc_tsx_force_abort_set = true; x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL | MSR_OP_WRITE, 1, NULL); } flags = 0; if (config & IAP_OS) flags |= IAF_OS; if (config & IAP_USR) flags |= IAF_USR; if (config & IAP_ANY) flags |= IAF_ANY; if (config & IAP_INT) flags |= IAF_PMI; caps = a->pm_caps; if (caps & PMC_CAP_INTERRUPT) flags |= IAF_PMI; if (caps & PMC_CAP_SYSTEM) flags |= IAF_OS; if (caps & PMC_CAP_USER) flags |= IAF_USR; if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) flags |= (IAF_OS | IAF_USR); pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4)); PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx", (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl); return (0); } static int iaf_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__, cpu)); core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm; return (0); } static int iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { struct pmc_hw *phw; phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri]; snprintf(pi->pm_name, sizeof(pi->pm_name), "IAF-%d", ri); pi->pm_class = PMC_CLASS_IAF; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int iaf_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc; return (0); } static int iaf_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[iaf,%d] ri %d out of range", __LINE__, ri)); *msr = IAF_RI_TO_MSR(ri); return (0); } static int iaf_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); tmp = rdpmc(IAF_RI_TO_MSR(ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = iaf_perfctr_value_to_reload_count(tmp); else *v = tmp & ((1ULL << core_iaf_width) - 1); PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri, IAF_RI_TO_MSR(ri), *v); return (0); } static int iaf_release_pmc(int cpu, int ri, struct pmc *pmc) { PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__)); MPASS(pmc_alloc_refs > 0); if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) { pmc_tsx_force_abort_set = false; x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL | MSR_OP_WRITE, 0, NULL); } return (0); } static int iaf_start_pmc(int cpu, int ri, struct pmc *pm) { struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri); cc = core_pcpu[cpu]; cc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl; wrmsr(IAF_CTRL, cc->pc_iafctrl); cc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET)); wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)", cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL), cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL)); return (0); } static int iaf_stop_pmc(int cpu, int ri, struct pmc *pm) { struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG2(MDP,STA,1,"iaf-stop cpu=%d ri=%d", cpu, ri); cc = core_pcpu[cpu]; cc->pc_iafctrl &= ~(IAF_MASK << (ri * 4)); wrmsr(IAF_CTRL, cc->pc_iafctrl); /* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */ PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)", cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL), cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL)); return (0); } static int iaf_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); cc = core_pcpu[cpu]; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = iaf_reload_count_to_perfctr_value(v); /* Turn off the fixed counter */ wrmsr(IAF_CTRL, cc->pc_iafctrl & ~(IAF_MASK << (ri * 4))); wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1)); /* Turn on fixed counters */ wrmsr(IAF_CTRL, cc->pc_iafctrl); PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx " "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v, (uintmax_t) rdmsr(IAF_CTRL), (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri))); return (0); } static void iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "iaf-initialize"); pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF]; pcd->pcd_caps = IAF_PMC_CAPS; pcd->pcd_class = PMC_CLASS_IAF; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = iaf_allocate_pmc; pcd->pcd_config_pmc = iaf_config_pmc; pcd->pcd_describe = iaf_describe; pcd->pcd_get_config = iaf_get_config; pcd->pcd_get_msr = iaf_get_msr; pcd->pcd_pcpu_fini = core_pcpu_noop; pcd->pcd_pcpu_init = core_pcpu_noop; pcd->pcd_read_pmc = iaf_read_pmc; pcd->pcd_release_pmc = iaf_release_pmc; pcd->pcd_start_pmc = iaf_start_pmc; pcd->pcd_stop_pmc = iaf_stop_pmc; pcd->pcd_write_pmc = iaf_write_pmc; md->pmd_npmc += npmc; } /* * Intel programmable PMCs. */ /* Sub fields of UMASK that this event supports. */ #define IAP_M_CORE (1 << 0) /* Core specificity */ #define IAP_M_AGENT (1 << 1) /* Agent specificity */ #define IAP_M_PREFETCH (1 << 2) /* Prefetch */ #define IAP_M_MESI (1 << 3) /* MESI */ #define IAP_M_SNOOPRESPONSE (1 << 4) /* Snoop response */ #define IAP_M_SNOOPTYPE (1 << 5) /* Snoop type */ #define IAP_M_TRANSITION (1 << 6) /* Transition */ #define IAP_F_CORE (0x3 << 14) /* Core specificity */ #define IAP_F_AGENT (0x1 << 13) /* Agent specificity */ #define IAP_F_PREFETCH (0x3 << 12) /* Prefetch */ #define IAP_F_MESI (0xF << 8) /* MESI */ #define IAP_F_SNOOPRESPONSE (0xB << 8) /* Snoop response */ #define IAP_F_SNOOPTYPE (0x3 << 8) /* Snoop type */ #define IAP_F_TRANSITION (0x1 << 12) /* Transition */ #define IAP_PREFETCH_RESERVED (0x2 << 12) #define IAP_CORE_THIS (0x1 << 14) #define IAP_CORE_ALL (0x3 << 14) #define IAP_F_CMASK 0xFF000000 static pmc_value_t iap_perfctr_value_to_reload_count(pmc_value_t v) { /* If the PMC has overflowed, return a reload count of zero. */ if ((v & (1ULL << (core_iap_width - 1))) == 0) return (0); v &= (1ULL << core_iap_width) - 1; return (1ULL << core_iap_width) - v; } static pmc_value_t iap_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << core_iap_width) - rlc; } static int iap_pmc_has_overflowed(int ri) { uint64_t v; /* * We treat a Core (i.e., Intel architecture v1) PMC as has * having overflowed if its MSB is zero. */ v = rdpmc(ri); return ((v & (1ULL << (core_iap_width - 1))) == 0); } static int iap_event_corei7_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* Events valid only on counter 0, 1. */ case 0x40: case 0x41: case 0x42: case 0x43: case 0x4C: case 0x4E: case 0x51: case 0x52: case 0x53: case 0x63: mask = 0x3; break; /* Any row index is ok. */ default: mask = ~0; } return (mask & (1 << ri)); } static int iap_event_westmere_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* Events valid only on counter 0. */ case 0x60: case 0xB3: mask = 0x1; break; /* Events valid only on counter 0, 1. */ case 0x4C: case 0x4E: case 0x51: case 0x52: case 0x63: mask = 0x3; break; /* Any row index is ok. */ default: mask = ~0; } return (mask & (1 << ri)); } static int iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* Events valid only on counter 0. */ case 0xB7: mask = 0x1; break; /* Events valid only on counter 1. */ case 0xC0: mask = 0x2; break; /* Events valid only on counter 2. */ case 0x48: case 0xA2: case 0xA3: mask = 0x4; break; /* Events valid only on counter 3. */ case 0xBB: case 0xCD: mask = 0x8; break; /* Any row index is ok. */ default: mask = ~0; } return (mask & (1 << ri)); } static int iap_event_core_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* * Events valid only on counter 0. */ case 0x10: case 0x14: case 0x18: case 0xB3: case 0xC1: case 0xCB: mask = (1 << 0); break; /* * Events valid only on counter 1. */ case 0x11: case 0x12: case 0x13: mask = (1 << 1); break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int iap_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint8_t ev; const struct pmc_md_iap_op_pmcallocate *iap; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index value %d", __LINE__, ri)); if (a->pm_class != PMC_CLASS_IAP) return (EINVAL); + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + iap = &a->pm_md.pm_iap; ev = IAP_EVSEL_GET(iap->pm_iap_config); switch (core_cputype) { case PMC_CPU_INTEL_CORE: case PMC_CPU_INTEL_CORE2: case PMC_CPU_INTEL_CORE2EXTREME: if (iap_event_core_ok_on_counter(ev, ri) == 0) return (EINVAL); case PMC_CPU_INTEL_COREI7: case PMC_CPU_INTEL_NEHALEM_EX: if (iap_event_corei7_ok_on_counter(ev, ri) == 0) return (EINVAL); break; case PMC_CPU_INTEL_WESTMERE: case PMC_CPU_INTEL_WESTMERE_EX: if (iap_event_westmere_ok_on_counter(ev, ri) == 0) return (EINVAL); break; case PMC_CPU_INTEL_SANDYBRIDGE: case PMC_CPU_INTEL_SANDYBRIDGE_XEON: case PMC_CPU_INTEL_IVYBRIDGE: case PMC_CPU_INTEL_IVYBRIDGE_XEON: case PMC_CPU_INTEL_HASWELL: case PMC_CPU_INTEL_HASWELL_XEON: case PMC_CPU_INTEL_BROADWELL: case PMC_CPU_INTEL_BROADWELL_XEON: if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0) return (EINVAL); break; case PMC_CPU_INTEL_ATOM: case PMC_CPU_INTEL_ATOM_SILVERMONT: case PMC_CPU_INTEL_ATOM_GOLDMONT: case PMC_CPU_INTEL_ATOM_GOLDMONT_P: case PMC_CPU_INTEL_ATOM_TREMONT: case PMC_CPU_INTEL_SKYLAKE: case PMC_CPU_INTEL_SKYLAKE_XEON: case PMC_CPU_INTEL_ICELAKE: case PMC_CPU_INTEL_ICELAKE_XEON: case PMC_CPU_INTEL_ALDERLAKE: default: break; } pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config; return (0); } static int iap_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__, cpu)); core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm; return (0); } static int iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { struct pmc_hw *phw; phw = &core_pcpu[cpu]->pc_corepmcs[ri]; snprintf(pi->pm_name, sizeof(pi->pm_name), "IAP-%d", ri); pi->pm_class = PMC_CLASS_IAP; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int iap_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc; return (0); } static int iap_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < core_iap_npmc, ("[iap,%d] ri %d out of range", __LINE__, ri)); *msr = ri; return (0); } static int iap_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); tmp = rdpmc(ri); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = iap_perfctr_value_to_reload_count(tmp); else *v = tmp & ((1ULL << core_iap_width) - 1); PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri, IAP_PMC0 + ri, *v); return (0); } static int iap_release_pmc(int cpu, int ri, struct pmc *pm) { (void) pm; PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__)); return (0); } static int iap_start_pmc(int cpu, int ri, struct pmc *pm) { uint64_t evsel; struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); cc = core_pcpu[cpu]; PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri); evsel = pm->pm_md.pm_iap.pm_iap_evsel; PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x", cpu, ri, IAP_EVSEL0 + ri, evsel); /* Event specific configuration. */ switch (IAP_EVSEL_GET(evsel)) { case 0xB7: wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp); break; case 0xBB: wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp); break; default: break; } wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN); if (core_version >= 2) { cc->pc_globalctrl |= (1ULL << ri); wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); } return (0); } static int iap_stop_pmc(int cpu, int ri, struct pmc *pm __unused) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row index %d", __LINE__, ri)); PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri); wrmsr(IAP_EVSEL0 + ri, 0); /* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */ return (0); } static int iap_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row index %d", __LINE__, ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = iap_reload_count_to_perfctr_value(v); v &= (1ULL << core_iap_width) - 1; PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri, IAP_PMC0 + ri, v); /* * Write the new value to the counter (or it's alias). The * counter will be in a stopped state when the pcd_write() * entry point is called. */ wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v); return (0); } static void iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth, int flags) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "iap-initialize"); /* Remember the set of architectural events supported. */ core_architectural_events = ~flags; pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP]; pcd->pcd_caps = IAP_PMC_CAPS; pcd->pcd_class = PMC_CLASS_IAP; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = iap_allocate_pmc; pcd->pcd_config_pmc = iap_config_pmc; pcd->pcd_describe = iap_describe; pcd->pcd_get_config = iap_get_config; pcd->pcd_get_msr = iap_get_msr; pcd->pcd_pcpu_fini = core_pcpu_fini; pcd->pcd_pcpu_init = core_pcpu_init; pcd->pcd_read_pmc = iap_read_pmc; pcd->pcd_release_pmc = iap_release_pmc; pcd->pcd_start_pmc = iap_start_pmc; pcd->pcd_stop_pmc = iap_stop_pmc; pcd->pcd_write_pmc = iap_write_pmc; md->pmd_npmc += npmc; } static int core_intr(struct trapframe *tf) { pmc_value_t v; struct pmc *pm; struct core_cpu *cc; int error, found_interrupt, ri; PMCDBG3(MDP,INT, 1, "cpu=%d tf=%p um=%d", curcpu, (void *) tf, TRAPF_USERMODE(tf)); found_interrupt = 0; cc = core_pcpu[curcpu]; for (ri = 0; ri < core_iap_npmc; ri++) { if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; if (!iap_pmc_has_overflowed(ri)) continue; found_interrupt = 1; if (pm->pm_state != PMC_STATE_RUNNING) continue; error = pmc_process_interrupt(PMC_HR, pm, tf); v = pm->pm_sc.pm_reloadcount; v = iap_reload_count_to_perfctr_value(v); /* * Stop the counter, reload it but only restart it if * the PMC is not stalled. */ wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel); wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v); if (__predict_false(error)) continue; wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN); } if (found_interrupt) counter_u64_add(pmc_stats.pm_intr_processed, 1); else counter_u64_add(pmc_stats.pm_intr_ignored, 1); if (found_interrupt) lapic_reenable_pmc(); return (found_interrupt); } static int core2_intr(struct trapframe *tf) { int error, found_interrupt = 0, n, cpu; uint64_t flag, intrstatus, intrdisable = 0; struct pmc *pm; struct core_cpu *cc; pmc_value_t v; cpu = curcpu; PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); /* * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which * PMCs have a pending PMI interrupt. We take a 'snapshot' of * the current set of interrupting PMCs and process these * after stopping them. */ intrstatus = rdmsr(IA_GLOBAL_STATUS); PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu, (uintmax_t) intrstatus); /* * Stop PMCs unless hardware already done it. */ if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) wrmsr(IA_GLOBAL_CTRL, 0); cc = core_pcpu[cpu]; KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__)); /* * Look for interrupts from fixed function PMCs. */ for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc; n++, flag <<= 1) { if ((intrstatus & flag) == 0) continue; found_interrupt = 1; pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; error = pmc_process_interrupt(PMC_HR, pm, tf); if (__predict_false(error)) intrdisable |= flag; v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount); /* Reload sampling count. */ wrmsr(IAF_CTR0 + n, v); PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu, error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n))); } /* * Process interrupts from the programmable counters. */ for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) { if ((intrstatus & flag) == 0) continue; found_interrupt = 1; pm = cc->pc_corepmcs[n].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; error = pmc_process_interrupt(PMC_HR, pm, tf); if (__predict_false(error)) intrdisable |= flag; v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount); PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error, (uintmax_t) v); /* Reload sampling count. */ wrmsr(core_iap_wroffset + IAP_PMC0 + n, v); } if (found_interrupt) counter_u64_add(pmc_stats.pm_intr_processed, 1); else counter_u64_add(pmc_stats.pm_intr_ignored, 1); if (found_interrupt) lapic_reenable_pmc(); /* * Reenable all non-stalled PMCs. */ if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) { wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus); cc->pc_globalctrl &= ~intrdisable; wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); } else { if (__predict_false(intrdisable)) { cc->pc_globalctrl &= ~intrdisable; wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); } wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus); } PMCDBG4(MDP, INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL), (uintmax_t) rdmsr(IA_GLOBAL_CTRL), (uintmax_t) rdmsr(IA_GLOBAL_STATUS)); return (found_interrupt); } int pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override) { int cpuid[CORE_CPUID_REQUEST_SIZE]; int flags, nflags; do_cpuid(CORE_CPUID_REQUEST, cpuid); core_cputype = md->pmd_cputype; core_version = (version_override > 0) ? version_override : cpuid[CORE_CPUID_EAX] & 0xFF; PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d version=%d", core_cputype, maxcpu, core_version); if (core_version < 1 || core_version > 5 || (core_cputype != PMC_CPU_INTEL_CORE && core_version == 1)) { /* Unknown PMC architecture. */ printf("hwpmc_core: unknown PMC architecture: %d\n", core_version); return (EPROGMISMATCH); } core_iap_wroffset = 0; if (cpu_feature2 & CPUID2_PDCM) { if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) { PMCDBG0(MDP, INI, 1, "core-init full-width write supported"); core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0; } else PMCDBG0(MDP, INI, 1, "core-init full-width write NOT supported"); } else PMCDBG0(MDP, INI, 1, "core-init pdcm not supported"); core_pmcmask = 0; /* * Initialize programmable counters. */ core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF; core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF; core_pmcmask |= ((1ULL << core_iap_npmc) - 1); nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF; flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1); iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags); /* * Initialize fixed function counters, if present. */ if (core_version >= 2) { core_iaf_ri = core_iap_npmc; core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F; core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF; iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width); core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET; } PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask, core_iaf_ri); core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC, M_ZERO | M_WAITOK); /* * Choose the appropriate interrupt handler. */ if (core_version >= 2) md->pmd_intr = core2_intr; else md->pmd_intr = core_intr; return (0); } void pmc_core_finalize(struct pmc_mdep *md) { PMCDBG0(MDP,INI,1, "core-finalize"); free(core_pcpu, M_PMC); core_pcpu = NULL; } diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index de012b74d558..5dd8bc67d60a 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1,5878 +1,5879 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needs to be after */ #include #include #include #include #include #include #include #include "hwpmc_soft.h" #define PMC_EPOCH_ENTER() \ struct epoch_tracker pmc_et; \ epoch_enter_preempt(global_epoch_preempt, &pmc_et) #define PMC_EPOCH_EXIT() \ epoch_exit_preempt(global_epoch_preempt, &pmc_et) /* * Types */ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ PMC_FLAG_NOWAIT = 0x04, /* do not wait for mallocs */ }; /* * The offset in sysent where the syscall is allocated. */ static int pmc_syscall_num = NO_SYSCALL; struct pmc_cpu **pmc_pcpu; /* per-cpu state */ pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ #define PMC_PCPU_SAVED(C, R) pmc_pcpu_saved[(R) + md->pmd_npmc * (C)] struct mtx_pool *pmc_mtxpool; static int *pmc_pmcdisp; /* PMC row dispositions */ #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) #define PMC_MARK_ROW_FREE(R) do { \ pmc_pmcdisp[(R)] = 0; \ } while (0) #define PMC_MARK_ROW_STANDALONE(R) do { \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ ("[pmc,%d] row disposition error", __LINE__)); \ } while (0) #define PMC_UNMARK_ROW_STANDALONE(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) #define PMC_MARK_ROW_THREAD(R) do { \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ } while (0) #define PMC_UNMARK_ROW_THREAD(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) /* various event handlers */ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, pmc_kld_unload_tag; /* Module statistics */ struct pmc_driverstats pmc_stats; /* Machine/processor dependent operations */ static struct pmc_mdep *md; /* * Hash tables mapping owner processes and target threads to PMCs. */ struct mtx pmc_processhash_mtx; /* spin mutex */ static u_long pmc_processhashmask; static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; /* * Hash table of PMC owner descriptors. This table is protected by * the shared PMC "sx" lock. */ static u_long pmc_ownerhashmask; static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; /* * List of PMC owners with system-wide sampling PMCs. */ static CK_LIST_HEAD(, pmc_owner) pmc_ss_owners; /* * List of free thread entries. This is protected by the spin * mutex. */ static struct mtx pmc_threadfreelist_mtx; /* spin mutex */ static LIST_HEAD(, pmc_thread) pmc_threadfreelist; static int pmc_threadfreelist_entries = 0; #define THREADENTRY_SIZE (sizeof(struct pmc_thread) + \ (md->pmd_npmc * sizeof(struct pmc_threadpmcstate))) /* * Task to free thread descriptors */ static struct task free_task; /* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; /* * Prototypes */ #ifdef HWPMC_DEBUG static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); static int pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static bool pmc_can_allocate_row(int ri, enum pmc_mode mode); static bool pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); static void pmc_destroy_process_descriptor(struct pmc_process *pp); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); static void pmc_log_all_process_mappings(struct pmc_owner *po); static void pmc_log_kernel_mappings(struct pmc *pm); static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_post_callchain_callback(void); static void pmc_process_allproc(struct pmc *pm); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); static void pmc_process_exec(struct thread *td, struct pmckern_procexec *pk); static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); static void pmc_process_proccreate(struct proc *p); static void pmc_process_samples(int cpu, ring_type_t soft); static void pmc_process_threadcreate(struct thread *td); static void pmc_process_threadexit(struct thread *td); static void pmc_process_thread_add(struct thread *td); static void pmc_process_thread_delete(struct thread *td); static void pmc_process_thread_userret(struct thread *td); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void); static void pmc_thread_descriptor_pool_drain(void); static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); /* * Kernel tunables and sysctl(8) interface. */ SYSCTL_DECL(_kern_hwpmc); SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "HWPMC stats"); /* Stats. */ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW, &pmc_stats.pm_intr_ignored, "# of interrupts ignored"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW, &pmc_stats.pm_intr_processed, "# of interrupts processed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW, &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW, &pmc_stats.pm_syscalls, "# of syscalls"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW, &pmc_stats.pm_syscall_errors, "# of syscall_errors"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW, &pmc_stats.pm_buffer_requests, "# of buffer requests"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW, &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, &pmc_stats.pm_log_sweeps, "# of times samples were processed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW, &pmc_stats.pm_merges, "# of times kernel stack was found for user trace"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW, &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged"); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); char pmc_cpuid[PMC_CPUID_LEN]; SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD, pmc_cpuid, 0, "cpu version string"); #ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr)); SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif /* * kern.hwpmc.hashsize -- determines the number of rows in the * of the hash table used to look up threads */ static int pmc_hashsize = PMC_HASH_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, &pmc_hashsize, 0, "rows in hash tables"); /* * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); static uint64_t pmc_sample_mask = PMC_NSAMPLES - 1; /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, &pmc_mtxpool_size, 0, "size of spin mutex pool"); /* * kern.hwpmc.threadfreelist_entries -- number of free entries */ SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD, &pmc_threadfreelist_entries, 0, "number of available thread entries"); /* * kern.hwpmc.threadfreelist_max -- maximum number of free entries */ static int pmc_threadfreelist_max = PMC_THREADLIST_MAX; SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW, &pmc_threadfreelist_max, 0, "maximum number of available thread entries before freeing some"); /* * kern.hwpmc.mincount -- minimum sample count */ static u_int pmc_mincount = 1000; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mincount, CTLFLAG_RWTUN, &pmc_mincount, 0, "minimum count for sampling counters"); /* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * * Allowing unprivileged processes to allocate system PMCs is convenient * if system-wide measurements need to be taken concurrently with other * per-process measurements. This feature is turned off by default. */ static int pmc_unprivileged_syspmcs = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, &pmc_unprivileged_syspmcs, 0, "allow unprivileged process to allocate system PMCs"); /* * Hash function. Discard the lower 2 bits of the pointer since * these are always zero for our uses. The hash multiplier is * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). */ #if LONG_BIT == 64 #define _PMC_HM 11400714819323198486u #elif LONG_BIT == 32 #define _PMC_HM 2654435769u #else #error Must know the size of 'long' to compile #endif #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) /* * Syscall structures */ /* The `sysent' for the new syscall */ static struct sysent pmc_sysent = { .sy_narg = 2, .sy_call = pmc_syscall_handler, }; static struct syscall_module_data pmc_syscall_mod = { .chainevh = load, .chainarg = NULL, .offset = &pmc_syscall_num, .new_sysent = &pmc_sysent, .old_sysent = { .sy_narg = 0, .sy_call = NULL }, .flags = SY_THR_STATIC_KLD, }; static moduledata_t pmc_mod = { .name = PMC_MODULE_NAME, .evhand = syscall_module_handler, .priv = &pmc_syscall_mod, }; #ifdef EARLY_AP_STARTUP DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY); #else DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); #endif MODULE_VERSION(pmc, PMC_VERSION); #ifdef HWPMC_DEBUG enum pmc_dbgparse_state { PMCDS_WS, /* in whitespace */ PMCDS_MAJOR, /* seen a major keyword */ PMCDS_MINOR }; static int pmc_debugflags_parse(char *newstr, char *fence) { struct pmc_debugflags *tmpflags; size_t kwlen; char c, *p, *q; int error, *newbits, tmp; int found; tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK | M_ZERO); error = 0; for (p = newstr; p < fence && (c = *p); p++) { /* skip white space */ if (c == ' ' || c == '\t') continue; /* look for a keyword followed by "=" */ for (q = p; p < fence && (c = *p) && c != '='; p++) ; if (c != '=') { error = EINVAL; goto done; } kwlen = p - q; newbits = NULL; /* lookup flag group name */ #define DBG_SET_FLAG_MAJ(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ newbits = &tmpflags->pdb_ ## F; DBG_SET_FLAG_MAJ("cpu", CPU); DBG_SET_FLAG_MAJ("csw", CSW); DBG_SET_FLAG_MAJ("logging", LOG); DBG_SET_FLAG_MAJ("module", MOD); DBG_SET_FLAG_MAJ("md", MDP); DBG_SET_FLAG_MAJ("owner", OWN); DBG_SET_FLAG_MAJ("pmc", PMC); DBG_SET_FLAG_MAJ("process", PRC); DBG_SET_FLAG_MAJ("sampling", SAM); #undef DBG_SET_FLAG_MAJ if (newbits == NULL) { error = EINVAL; goto done; } p++; /* skip the '=' */ /* Now parse the individual flags */ tmp = 0; newflag: for (q = p; p < fence && (c = *p); p++) if (c == ' ' || c == '\t' || c == ',') break; /* p == fence or c == ws or c == "," or c == 0 */ if ((kwlen = p - q) == 0) { *newbits = tmp; continue; } found = 0; #define DBG_SET_FLAG_MIN(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) /* a '*' denotes all possible flags in the group */ if (kwlen == 1 && *q == '*') tmp = found = ~0; /* look for individual flag names */ DBG_SET_FLAG_MIN("allocaterow", ALR); DBG_SET_FLAG_MIN("allocate", ALL); DBG_SET_FLAG_MIN("attach", ATT); DBG_SET_FLAG_MIN("bind", BND); DBG_SET_FLAG_MIN("config", CFG); DBG_SET_FLAG_MIN("exec", EXC); DBG_SET_FLAG_MIN("exit", EXT); DBG_SET_FLAG_MIN("find", FND); DBG_SET_FLAG_MIN("flush", FLS); DBG_SET_FLAG_MIN("fork", FRK); DBG_SET_FLAG_MIN("getbuf", GTB); DBG_SET_FLAG_MIN("hook", PMH); DBG_SET_FLAG_MIN("init", INI); DBG_SET_FLAG_MIN("intr", INT); DBG_SET_FLAG_MIN("linktarget", TLK); DBG_SET_FLAG_MIN("mayberemove", OMR); DBG_SET_FLAG_MIN("ops", OPS); DBG_SET_FLAG_MIN("read", REA); DBG_SET_FLAG_MIN("register", REG); DBG_SET_FLAG_MIN("release", REL); DBG_SET_FLAG_MIN("remove", ORM); DBG_SET_FLAG_MIN("sample", SAM); DBG_SET_FLAG_MIN("scheduleio", SIO); DBG_SET_FLAG_MIN("select", SEL); DBG_SET_FLAG_MIN("signal", SIG); DBG_SET_FLAG_MIN("swi", SWI); DBG_SET_FLAG_MIN("swo", SWO); DBG_SET_FLAG_MIN("start", STA); DBG_SET_FLAG_MIN("stop", STO); DBG_SET_FLAG_MIN("syscall", PMS); DBG_SET_FLAG_MIN("unlinktarget", TUL); DBG_SET_FLAG_MIN("write", WRI); #undef DBG_SET_FLAG_MIN if (found == 0) { /* unrecognized flag name */ error = EINVAL; goto done; } if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ *newbits = tmp; continue; } p++; goto newflag; } /* save the new flag set */ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); done: free(tmpflags, M_PMC); return (error); } static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) { char *fence, *newstr; int error; u_int n; n = sizeof(pmc_debugstr); newstr = malloc(n, M_PMC, M_WAITOK | M_ZERO); strlcpy(newstr, pmc_debugstr, n); error = sysctl_handle_string(oidp, newstr, n, req); /* if there is a new string, parse and copy it */ if (error == 0 && req->newptr != NULL) { fence = newstr + (n < req->newlen ? n : req->newlen + 1); error = pmc_debugflags_parse(newstr, fence); if (error == 0) strlcpy(pmc_debugstr, newstr, sizeof(pmc_debugstr)); } free(newstr, M_PMC); return (error); } #endif /* * Map a row index to a classdep structure and return the adjusted row * index for the PMC class index. */ static struct pmc_classdep * pmc_ri_to_classdep(struct pmc_mdep *md __unused, int ri, int *adjri) { struct pmc_classdep *pcd; KASSERT(ri >= 0 && ri < md->pmd_npmc, ("[pmc,%d] illegal row-index %d", __LINE__, ri)); pcd = pmc_rowindex_to_classdep[ri]; KASSERT(pcd != NULL, ("[pmc,%d] ri %d null pcd", __LINE__, ri)); *adjri = ri - pcd->pcd_ri; KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); return (pcd); } /* * Concurrency Control * * The driver manages the following data structures: * * - target process descriptors, one per target process * - owner process descriptors (and attached lists), one per owner process * - lookup hash tables for owner and target processes * - PMC descriptors (and attached lists) * - per-cpu hardware state * - the 'hook' variable through which the kernel calls into * this module * - the machine hardware state (managed by the MD layer) * * These data structures are accessed from: * * - thread context-switch code * - interrupt handlers (possibly on multiple cpus) * - kernel threads on multiple cpus running on behalf of user * processes doing system calls * - this driver's private kernel threads * * = Locks and Locking strategy = * * The driver uses four locking strategies for its operation: * * - The global SX lock "pmc_sx" is used to protect internal * data structures. * * Calls into the module by syscall() start with this lock being * held in exclusive mode. Depending on the requested operation, * the lock may be downgraded to 'shared' mode to allow more * concurrent readers into the module. Calls into the module from * other parts of the kernel acquire the lock in shared mode. * * This SX lock is held in exclusive mode for any operations that * modify the linkages between the driver's internal data structures. * * The 'pmc_hook' function pointer is also protected by this lock. * It is only examined with the sx lock held in exclusive mode. The * kernel module is allowed to be unloaded only with the sx lock held * in exclusive mode. In normal syscall handling, after acquiring the * pmc_sx lock we first check that 'pmc_hook' is non-null before * proceeding. This prevents races between the thread unloading the module * and other threads seeking to use the module. * * - Lookups of target process structures and owner process structures * cannot use the global "pmc_sx" SX lock because these lookups need * to happen during context switches and in other critical sections * where sleeping is not allowed. We protect these lookup tables * with their own private spin-mutexes, "pmc_processhash_mtx" and * "pmc_ownerhash_mtx". * * - Interrupt handlers work in a lock free manner. At interrupt * time, handlers look at the PMC pointer (phw->phw_pmc) configured * when the PMC was started. If this pointer is NULL, the interrupt * is ignored after updating driver statistics. We ensure that this * pointer is set (using an atomic operation if necessary) before the * PMC hardware is started. Conversely, this pointer is unset atomically * only after the PMC hardware is stopped. * * We ensure that everything needed for the operation of an * interrupt handler is available without it needing to acquire any * locks. We also ensure that a PMC's software state is destroyed only * after the PMC is taken off hardware (on all CPUs). * * - Context-switch handling with process-private PMCs needs more * care. * * A given process may be the target of multiple PMCs. For example, * PMCATTACH and PMCDETACH may be requested by a process on one CPU * while the target process is running on another. A PMC could also * be getting released because its owner is exiting. We tackle * these situations in the following manner: * * - each target process structure 'pmc_process' has an array * of 'struct pmc *' pointers, one for each hardware PMC. * * - At context switch IN time, each "target" PMC in RUNNING state * gets started on hardware and a pointer to each PMC is copied into * the per-cpu phw array. The 'runcount' for the PMC is * incremented. * * - At context switch OUT time, all process-virtual PMCs are stopped * on hardware. The saved value is added to the PMCs value field * only if the PMC is in a non-deleted state (the PMCs state could * have changed during the current time slice). * * Note that since in-between a switch IN on a processor and a switch * OUT, the PMC could have been released on another CPU. Therefore * context switch OUT always looks at the hardware state to turn * OFF PMCs and will update a PMC's saved value only if reachable * from the target process record. * * - OP PMCRELEASE could be called on a PMC at any time (the PMC could * be attached to many processes at the time of the call and could * be active on multiple CPUs). * * We prevent further scheduling of the PMC by marking it as in * state 'DELETED'. If the runcount of the PMC is non-zero then * this PMC is currently running on a CPU somewhere. The thread * doing the PMCRELEASE operation waits by repeatedly doing a * pause() till the runcount comes to zero. * * The contents of a PMC descriptor (struct pmc) are protected using * a spin-mutex. In order to save space, we use a mutex pool. * * In terms of lock types used by witness(4), we use: * - Type "pmc-sx", used by the global SX lock. * - Type "pmc-sleep", for sleep mutexes used by logger threads. * - Type "pmc-per-proc", for protecting PMC owner descriptors. * - Type "pmc-leaf", used for all other spin mutexes. */ /* * Save the CPU binding of the current kthread. */ void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG0(CPU,BND,2, "save-cpu"); thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; pb->pb_priority = curthread->td_priority; thread_unlock(curthread); PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } /* * Restore the CPU binding of the current thread. */ void pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); thread_lock(curthread); sched_bind(curthread, pb->pb_cpu); if (!pb->pb_bound) sched_unbind(curthread); sched_prio(curthread, pb->pb_priority); thread_unlock(curthread); PMCDBG0(CPU,BND,2, "restore-cpu done"); } /* * Move execution over to the specified CPU and bind it there. */ void pmc_select_cpu(int cpu) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); /* Never move to an inactive CPU. */ KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " "CPU %d", __LINE__, cpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu); thread_lock(curthread); sched_prio(curthread, PRI_MIN); sched_bind(curthread, cpu); thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, cpu, curthread->td_oncpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); } /* * Force a context switch. * * We do this by pause'ing for 1 tick -- invoking mi_switch() is not * guaranteed to force a context switch. */ static void pmc_force_context_switch(void) { pause("pmcctx", 1); } uint64_t pmc_rdtsc(void) { #if defined(__i386__) || defined(__amd64__) if (__predict_true(amd_feature & AMDID_RDTSCP)) return (rdtscp()); else return (rdtsc()); #else return (get_cyclecount()); #endif } /* * Get the file name for an executable. This is a simple wrapper * around vn_fullpath(9). */ static void pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) { *fullpath = "unknown"; *freepath = NULL; vn_fullpath(v, fullpath, freepath); } /* * Remove a process owning PMCs. */ void pmc_remove_owner(struct pmc_owner *po) { struct pmc *pm, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po); /* Remove descriptor from the owner hash table */ LIST_REMOVE(po, po_next); /* release all owned PMC descriptors */ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { PMCDBG1(OWN,ORM,2, "pmc=%p", pm); KASSERT(pm->pm_owner == po, ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); pmc_release_pmc_descriptor(pm); /* will unlink from the list */ pmc_destroy_pmc_descriptor(pm); } KASSERT(po->po_sscount == 0, ("[pmc,%d] SS count not zero", __LINE__)); KASSERT(LIST_EMPTY(&po->po_pmcs), ("[pmc,%d] PMC list not empty", __LINE__)); /* de-configure the log file if present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_deconfigure_log(po); } /* * Remove an owner process record if all conditions are met. */ static void pmc_maybe_remove_owner(struct pmc_owner *po) { PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po); /* * Remove owner record if * - this process does not own any PMCs * - this process has not allocated a system-wide sampling buffer */ if (LIST_EMPTY(&po->po_pmcs) && ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } } /* * Add an association between a target process and a PMC. */ static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) { struct pmc_target *pt; struct pmc_thread *pt_td __diagused; int ri; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", __LINE__, pm, pp->pp_proc->p_pid)); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); #ifdef HWPMC_DEBUG LIST_FOREACH(pt, &pm->pm_targets, pt_next) { if (pt->pt_process == pp) KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", __LINE__, pp, pm)); } #endif pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK | M_ZERO); pt->pt_process = pp; LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, (uintptr_t)pm); if (pm->pm_owner->po_owner == pp->pp_proc) pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; /* * Initialize the per-process values at this row index. */ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? pm->pm_sc.pm_reloadcount : 0; pp->pp_refcnt++; #ifdef INVARIANTS /* Confirm that the per-thread values at this row index are cleared. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) { KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0, ("[pmc,%d] pt_pmcval not cleared for pid=%d at " "ri=%d", __LINE__, pp->pp_proc->p_pid, ri)); } mtx_unlock_spin(pp->pp_tdslock); } #endif } /* * Removes the association between a target process and a PMC. */ static void pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct proc *p; struct pmc_target *ptgt; struct pmc_thread *pt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, ri, pm, pp->pp_pmcs[ri].pp_pmc)); pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t)0; /* Clear the per-thread values at this row index. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t)0; mtx_unlock_spin(pp->pp_tdslock); } /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; } pp->pp_refcnt--; /* Remove the target process from the PMC structure */ LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) if (ptgt->pt_process == pp) break; KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); LIST_REMOVE(ptgt, pt_next); free(ptgt, M_PMC); /* if the PMC now lacks targets, send the owner a SIGIO */ if (LIST_EMPTY(&pm->pm_targets)) { p = pm->pm_owner->po_owner; PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p, SIGIO); } } /* * Check if PMC 'pm' may be attached to target process 't'. */ static int pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ int decline_attach, i; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) return 0; PROC_LOCK(o); oc = o->p_ucred; crhold(oc); PROC_UNLOCK(o); PROC_LOCK(t); tc = t->p_ucred; crhold(tc); PROC_UNLOCK(t); /* * The effective uid of the PMC owner should match at least one * of the {effective,real,saved} uids of the target process. */ decline_attach = oc->cr_uid != tc->cr_uid && oc->cr_uid != tc->cr_svuid && oc->cr_uid != tc->cr_ruid; /* * Every one of the target's group ids, must be in the owner's * group list. */ for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); /* check the read and saved gids too */ if (decline_attach == 0) decline_attach = !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); return !decline_attach; } /* * Attach a process to a PMC. */ static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { int ri, error; char *fullpath, *freepath; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', * allocating space as needed. * * Verify that rowindex 'pm_rowindex' is free in the process * descriptor. * * If not, allocate space for a descriptor and link the * process descriptor and PMC. */ ri = PMC_TO_ROWINDEX(pm); /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) { error = ENOMEM; goto fail; } if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */ error = EEXIST; goto fail; } if (pp->pp_pmcs[ri].pp_pmc != NULL) { error = EBUSY; goto fail; } pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) pm->pm_flags |= PMC_F_NEEDS_LOGFILE; pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ /* issue an attach event to a configured log file */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { if (p->p_flag & P_KPROC) { fullpath = kernelname; freepath = NULL; } else { pmc_getfilename(p->p_textvp, &fullpath, &freepath); pmclog_process_pmcattach(pm, p->p_pid, fullpath); } free(freepath, M_TEMP); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } return (0); fail: PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return (error); } /* * Attach a process and optionally its children */ static int pmc_attach_process(struct proc *p, struct pmc *pm) { int error; struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * If this PMC successfully allowed a GETMSR operation * in the past, disallow further ATTACHes. */ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) return (EPERM); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return (pmc_attach_one_process(p, pm)); /* * Traverse all child processes, attaching them to * this PMC. */ sx_slock(&proctree_lock); top = p; for (;;) { if ((error = pmc_attach_one_process(p, pm)) != 0) break; if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } if (error != 0) (void)pmc_detach_process(top, pm); done: sx_sunlock(&proctree_lock); return (error); } /* * Detach a process from a PMC. If there are no other PMCs tracking * this process, remove the process structure from its hash table. If * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. */ static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) { int ri; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); ri = PMC_TO_ROWINDEX(pm); PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return (ESRCH); if (pp->pp_pmcs[ri].pp_pmc != pm) return (EINVAL); pmc_unlink_target_process(pm, pp); /* Issue a detach entry if a log file is configured */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcdetach(pm, p->p_pid); /* * If there are no PMCs targeting this process, we remove its * descriptor from the target hash table and unset the P_HWPMC * flag in the struct proc. */ KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal refcnt %d for process struct %p", __LINE__, pp->pp_refcnt, pp)); if (pp->pp_refcnt != 0) /* still a target of some PMC */ return (0); pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) pmc_destroy_process_descriptor(pp); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return (0); } /* * Detach a process and optionally its descendants from a PMC. */ static int pmc_detach_process(struct proc *p, struct pmc *pm) { struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return (pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE)); /* * Traverse all children, detaching them from this PMC. We * ignore errors since we could be detaching a PMC from a * partially attached proc tree. */ sx_slock(&proctree_lock); top = p; for (;;) { (void)pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); if (!LIST_EMPTY(&p->p_children)) { p = LIST_FIRST(&p->p_children); } else { for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } } done: sx_sunlock(&proctree_lock); if (LIST_EMPTY(&pm->pm_targets)) pm->pm_flags &= ~PMC_F_ATTACH_DONE; return (0); } /* * Handle events after an exec() for a process: * - Inform log owners of the new exec() event * - Release any PMCs owned by the process before the exec() * - Detach PMCs from the target if required */ static void pmc_process_exec(struct thread *td, struct pmckern_procexec *pk) { struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; struct proc *p; char *fullpath, *freepath; u_int ri; bool is_using_hwpmcs; sx_assert(&pmc_sx, SX_XLOCKED); p = td->td_proc; pmc_getfilename(p->p_textvp, &fullpath, &freepath); PMC_EPOCH_ENTER(); /* Inform owners of SS mode PMCs of the exec event. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) { pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_baseaddr, pk->pm_dynaddr, fullpath); } } PMC_EPOCH_EXIT(); PROC_LOCK(p); is_using_hwpmcs = (p->p_flag & P_HWPMC) != 0; PROC_UNLOCK(p); if (!is_using_hwpmcs) { if (freepath != NULL) free(freepath, M_TEMP); return; } /* * PMCs are not inherited across an exec(): remove any PMCs that this * process is the owner of. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } /* * If the process being exec'ed is not the target of any PMC, we are * done. */ if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { if (freepath != NULL) free(freepath, M_TEMP); return; } /* * Log the exec event to all monitoring owners. Skip owners who have * already received the event because they had system sampling PMCs * active. */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; po = pm->pm_owner; if (po->po_sscount == 0 && (po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) { pmclog_process_procexec(po, pm->pm_id, p->p_pid, pk->pm_baseaddr, pk->pm_dynaddr, fullpath); } } if (freepath != NULL) free(freepath, M_TEMP); PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", p, p->p_pid, p->p_comm, pk->pm_credentialschanged); if (pk->pm_credentialschanged == 0) /* no change */ return; /* * If the newly exec()'ed process has a different credential * than before, allow it to be the target of a PMC only if * the PMC's owner has sufficient privilege. */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if (pmc_can_attach(pm, td->td_proc) != 0) { pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); } } } KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= md->pmd_npmc, ("[pmc,%d] Illegal ref count %u on pp %p", __LINE__, pp->pp_refcnt, pp)); /* * If this process is no longer the target of any * PMCs, we can remove the process entry and free * up space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); } } /* * Thread context switch IN. */ static void pmc_process_csw_in(struct thread *td) { struct pmc *pm; struct pmc_classdep *pcd; struct pmc_cpu *pc; struct pmc_hw *phw __diagused; struct pmc_process *pp; struct pmc_thread *pt; struct proc *p; pmc_value_t newvalue; int cpu; u_int adjri, ri; p = td->td_proc; pt = NULL; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; KASSERT(pp->pp_proc == td->td_proc, ("[pmc,%d] not my thread state", __LINE__)); critical_enter(); /* no preemption from this point */ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", __LINE__, PMC_TO_MODE(pm))); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need * be placed on hardware. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] pm=%p runcount %ju", __LINE__, pm, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); /* increment PMC runcount */ counter_u64_add(pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); (void)pcd->pcd_config_pmc(cpu, adjri, pm); phw = pc->pc_hwpmcs[ri]; KASSERT(phw != NULL, ("[pmc,%d] null hw pointer", __LINE__)); KASSERT(phw->phw_pmc == pm, ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, phw->phw_pmc, pm)); /* * Write out saved value and start the PMC. * * Sampling PMCs use a per-thread value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, use the per-thread * counter in the descriptor. If not, we will use * a per-process counter. * * TODO: Remove the per-process "safety net" once * we have thoroughly tested that we don't hit the * above assert. */ if (pt != NULL) { if (pt->pt_pmcs[ri].pt_pmcval > 0) newvalue = pt->pt_pmcs[ri].pt_pmcval; else newvalue = pm->pm_sc.pm_reloadcount; } else { /* * Use the saved value calculated after the most * recent time a thread using the shared counter * switched out. Reset the saved count in case * another thread from this process switches in * before any threads switch out. */ newvalue = pp->pp_pmcs[ri].pp_pmcval; pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); KASSERT(newvalue > 0 && newvalue <= pm->pm_sc.pm_reloadcount, ("[pmc,%d] pmcval outside of expected range cpu=%d " "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__, cpu, ri, newvalue, pm->pm_sc.pm_reloadcount)); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, PMC_TO_MODE(pm))); mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu, ri) = pm->pm_gv.pm_savedvalue; mtx_pool_unlock_spin(pmc_mtxpool, pm); } PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); (void)pcd->pcd_write_pmc(cpu, adjri, pm, newvalue); /* If a sampling mode PMC, reset stalled state. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; /* Start the PMC. */ (void)pcd->pcd_start_pmc(cpu, adjri, pm); } /* * Perform any other architecture/cpu dependent thread * switch-in actions. */ (void)(*md->pmd_switch_in)(pc, pp); critical_exit(); } /* * Thread context switch OUT. */ static void pmc_process_csw_out(struct thread *td) { struct pmc *pm; struct pmc_classdep *pcd; struct pmc_cpu *pc; struct pmc_process *pp; struct pmc_thread *pt = NULL; struct proc *p; pmc_value_t newvalue; int64_t tmp; enum pmc_mode mode; int cpu; u_int adjri, ri; /* * Locate our process descriptor; this may be NULL if * this process is exiting and we have already removed * the process from the target process table. * * Note that due to kernel preemption, multiple * context switches may happen while the process is * exiting. * * Note also that if the target process cannot be * found we still need to deconfigure any PMCs that * are currently running on hardware. */ p = td->td_proc; pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); critical_enter(); cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; /* * When a PMC gets unlinked from a target PMC, it will * be removed from the target's pp_pmc[] array. * * However, on a MP system, the target could have been * executing on another CPU at the time of the unlink. * So, at context switch OUT time, we need to look at * the hardware to determine if a PMC is scheduled on * it. */ for (ri = 0; ri < md->pmd_npmc; ri++) { pcd = pmc_ri_to_classdep(md, ri, &adjri); pm = NULL; (void)(*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL) /* nothing at this row index */ continue; mode = PMC_TO_MODE(pm); if (!PMC_IS_VIRTUAL_MODE(mode)) continue; /* not a process virtual PMC */ KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Change desired state, and then stop if not stalled. * This two-step dance should avoid race conditions where * an interrupt re-enables the PMC after this code has * already checked the pm_stalled flag. */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_pcpu_state[cpu].pps_stalled == 0) (void)pcd->pcd_stop_pmc(cpu, adjri, pm); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ju", __LINE__, pm, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); /* reduce this PMC's runcount */ counter_u64_add(pm->pm_runcount, -1); /* * If this PMC is associated with this process, * save the reading. */ if (pm->pm_state != PMC_STATE_DELETED && pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pp->pp_refcnt > 0, ("[pmc,%d] pp refcnt = %d", __LINE__, pp->pp_refcnt)); (void)pcd->pcd_read_pmc(cpu, adjri, pm, &newvalue); if (mode == PMC_MODE_TS) { PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)", cpu, ri, newvalue); if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, save the * per-thread counter in the descriptor. If not, * we will update the per-process counter. * * TODO: Remove the per-process "safety net" * once we have thoroughly tested that we * don't hit the above assert. */ if (pt != NULL) { pt->pt_pmcs[ri].pt_pmcval = newvalue; } else { /* * For sampling process-virtual PMCs, * newvalue is the number of events to * be seen until the next sampling * interrupt. We can just add the events * left from this invocation to the * counter, then adjust in case we * overflow our range. * * (Recall that we reload the counter * every time we use it.) */ pp->pp_pmcs[ri].pp_pmcval += newvalue; if (pp->pp_pmcs[ri].pp_pmcval > pm->pm_sc.pm_reloadcount) { pp->pp_pmcs[ri].pp_pmcval -= pm->pm_sc.pm_reloadcount; } } mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { tmp = newvalue - PMC_PCPU_SAVED(cpu, ri); PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)", cpu, ri, tmp); /* * For counting process-virtual PMCs, * we expect the count to be * increasing monotonically, modulo a 64 * bit wraparound. */ KASSERT(tmp >= 0, ("[pmc,%d] negative increment cpu=%d " "ri=%d newvalue=%jx saved=%jx " "incr=%jx", __LINE__, cpu, ri, newvalue, PMC_PCPU_SAVED(cpu, ri), tmp)); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); if (pm->pm_flags & PMC_F_LOG_PROCCSW) pmclog_process_proccsw(pm, pp, tmp, td); } } /* Mark hardware as free. */ (void)pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Perform any other architecture/cpu dependent thread * switch out functions. */ (void)(*md->pmd_switch_out)(pc, pp); critical_exit(); } /* * A new thread for a process. */ static void pmc_process_thread_add(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE); } /* * A thread delete for a process. */ static void pmc_process_thread_delete(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc, td, PMC_FLAG_REMOVE)); } /* * A userret() call for a thread. */ static void pmc_process_thread_userret(struct thread *td) { sched_pin(); pmc_capture_user_callchain(curcpu, PMC_UR, td->td_frame); sched_unpin(); } /* * A mapping change for a process. */ static void pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) { const struct pmc *pm; const struct pmc_process *pp; struct pmc_owner *po; char *fullpath, *freepath; pid_t pid; int ri; MPASS(!in_epoch(global_epoch_preempt)); freepath = fullpath = NULL; pmc_getfilename((struct vnode *)pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; PMC_EPOCH_ENTER(); /* Inform owners of all system-wide sampling PMCs. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); } if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; /* * Inform sampling PMC owners tracking this process. */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); } } done: if (freepath != NULL) free(freepath, M_TEMP); PMC_EPOCH_EXIT(); } /* * Log an munmap request. */ static void pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) { const struct pmc *pm; const struct pmc_process *pp; struct pmc_owner *po; pid_t pid; int ri; pid = td->td_proc->p_pid; PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } PMC_EPOCH_EXIT(); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; for (ri = 0; ri < md->pmd_npmc; ri++) { pm = pp->pp_pmcs[ri].pp_pmc; if (pm != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } } } /* * Log mapping information about the kernel. */ static void pmc_log_kernel_mappings(struct pmc *pm) { struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx)); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); po = pm->pm_owner; if ((po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) != 0) return; if (PMC_TO_MODE(pm) == PMC_MODE_SS) pmc_process_allproc(pm); /* * Log the current set of kernel modules. */ kmbase = linker_hwpmc_list_objects(); for (km = kmbase; km->pm_file != NULL; km++) { PMCDBG2(LOG,REG,1,"%s %p", (char *)km->pm_file, (void *)km->pm_address); pmclog_process_map_in(po, (pid_t)-1, km->pm_address, km->pm_file); } free(kmbase, M_LINKER); po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; } /* * Log the mappings for a single process. */ static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) { vm_map_t map; vm_map_entry_t entry; vm_object_t obj, lobj, tobj; vm_offset_t last_end; vm_offset_t start_addr; struct vnode *vp, *last_vp; struct vmspace *vm; char *fullpath, *freepath; u_int last_timestamp; last_vp = NULL; last_end = (vm_offset_t)0; fullpath = freepath = NULL; if ((vm = vmspace_acquire_ref(p)) == NULL) return; map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { if (entry == NULL) { PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " "NULL! pid=%d vm_map=%p\n", p->p_pid, map); break; } /* * We only care about executable map entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || (entry->protection & VM_PROT_EXECUTE) == 0 || entry->object.vm_object == NULL) { continue; } obj = entry->object.vm_object; VM_OBJECT_RLOCK(obj); /* * Walk the backing_object list to find the base (non-shadowed) * vm_object. */ for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } /* * At this point lobj is the base vm_object and it is locked. */ if (lobj == NULL) { PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); VM_OBJECT_RUNLOCK(obj); continue; } vp = vm_object_vnode(lobj); if (vp == NULL) { if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * Skip contiguous regions that point to the same vnode, so we * don't emit redundant MAP-IN directives. */ if (entry->start == last_end && vp == last_vp) { last_end = entry->end; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * We don't want to keep the proc's vm_map or this vm_object * locked while we walk the pathname, since vn_fullpath() can * sleep. However, if we drop the lock, it's possible for * concurrent activity to modify the vm_map list. To protect * against this, we save the vm_map timestamp before we release * the lock, and check it after we reacquire the lock below. */ start_addr = entry->start; last_end = entry->end; last_timestamp = map->timestamp; vm_map_unlock_read(map); vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); freepath = NULL; pmc_getfilename(vp, &fullpath, &freepath); last_vp = vp; vrele(vp); vp = NULL; pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); if (freepath != NULL) free(freepath, M_TEMP); vm_map_lock_read(map); /* * If our saved timestamp doesn't match, this means * that the vm_map was modified out from under us and * we can't trust our current "entry" pointer. Do a * new lookup for this entry. If there is no entry * for this address range, vm_map_lookup_entry() will * return the previous one, so we always want to go to * the next entry on the next loop iteration. * * There is an edge condition here that can occur if * there is no entry at or before this address. In * this situation, vm_map_lookup_entry returns * &map->header, which would cause our loop to abort * without processing the rest of the map. However, * in practice this will never happen for process * vm_map. This is because the executable's text * segment is the first mapping in the proc's address * space, and this mapping is never removed until the * process exits, so there will always be a non-header * entry at or before the requested address for * vm_map_lookup_entry to return. */ if (map->timestamp != last_timestamp) vm_map_lookup_entry(map, last_end - 1, &entry); } vm_map_unlock_read(map); vmspace_free(vm); return; } /* * Log mappings for all processes in the system. */ static void pmc_log_all_process_mappings(struct pmc_owner *po) { struct proc *p, *top; sx_assert(&pmc_sx, SX_XLOCKED); if ((p = pfind(1)) == NULL) panic("[pmc,%d] Cannot find init", __LINE__); PROC_UNLOCK(p); sx_slock(&proctree_lock); top = p; for (;;) { pmc_log_process_mappings(po, p); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); } #ifdef HWPMC_DEBUG const char *pmc_hooknames[] = { /* these strings correspond to PMC_FN_* in */ "", "EXEC", "CSW-IN", "CSW-OUT", "SAMPLE", "UNUSED1", "UNUSED2", "MMAP", "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", "SOFTSAMPLING", "THR-CREATE", "THR-EXIT", "THR-USERRET", "THR-CREATE-LOG", "THR-EXIT-LOG", "PROC-CREATE-LOG" }; #endif /* * The 'hook' invoked from the kernel proper */ static int pmc_hook_handler(struct thread *td, int function, void *arg) { int cpu; PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); switch (function) { case PMC_FN_PROCESS_EXEC: pmc_process_exec(td, (struct pmckern_procexec *)arg); break; case PMC_FN_CSW_IN: pmc_process_csw_in(td); break; case PMC_FN_CSW_OUT: pmc_process_csw_out(td); break; /* * Process accumulated PC samples. * * This function is expected to be called by hardclock() for * each CPU that has accumulated PC samples. * * This function is to be executed on the CPU whose samples * are being processed. */ case PMC_FN_DO_SAMPLES: /* * Clear the cpu specific bit in the CPU mask before * do the rest of the processing. If the NMI handler * gets invoked after the "atomic_clear_int()" call * below but before "pmc_process_samples()" gets * around to processing the interrupt, then we will * come back here at the next hardclock() tick (and * may find nothing to do if "pmc_process_samples()" * had already processed the interrupt). We don't * lose the interrupt sample. */ DPCPU_SET(pmc_sampled, 0); cpu = PCPU_GET(cpuid); pmc_process_samples(cpu, PMC_HR); pmc_process_samples(cpu, PMC_SR); pmc_process_samples(cpu, PMC_UR); break; case PMC_FN_MMAP: pmc_process_mmap(td, (struct pmckern_map_in *)arg); break; case PMC_FN_MUNMAP: MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx)); pmc_process_munmap(td, (struct pmckern_map_out *)arg); break; case PMC_FN_PROC_CREATE_LOG: pmc_process_proccreate((struct proc *)arg); break; case PMC_FN_USER_CALLCHAIN: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *)arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_USER_CALLCHAIN_SOFT: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); cpu = PCPU_GET(cpuid); pmc_capture_user_callchain(cpu, PMC_SR, (struct trapframe *) arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_SOFT_SAMPLING: /* * Call soft PMC sampling intr. */ pmc_soft_intr((struct pmckern_soft *)arg); break; case PMC_FN_THR_CREATE: pmc_process_thread_add(td); pmc_process_threadcreate(td); break; case PMC_FN_THR_CREATE_LOG: pmc_process_threadcreate(td); break; case PMC_FN_THR_EXIT: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_delete(td); pmc_process_threadexit(td); break; case PMC_FN_THR_EXIT_LOG: pmc_process_threadexit(td); break; case PMC_FN_THR_USERRET: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_userret(td); break; default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); #endif break; } return (0); } /* * Allocate a 'struct pmc_owner' descriptor in the owner hash table. */ static struct pmc_owner * pmc_allocate_owner_descriptor(struct proc *p) { struct pmc_owner *po; struct pmc_ownerhash *poh; uint32_t hindex; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; /* Allocate space for N pointers and one descriptor struct. */ po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK | M_ZERO); po->po_owner = p; LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ TAILQ_INIT(&po->po_logbuffers); mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", p, p->p_pid, p->p_comm, po); return (po); } static void pmc_destroy_owner_descriptor(struct pmc_owner *po) { PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); mtx_destroy(&po->po_mtx); free(po, M_PMC); } /* * Allocate a thread descriptor from the free pool. * * NOTE: This *can* return NULL. */ static struct pmc_thread * pmc_thread_descriptor_pool_alloc(void) { struct pmc_thread *pt; mtx_lock_spin(&pmc_threadfreelist_mtx); if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { LIST_REMOVE(pt, pt_next); pmc_threadfreelist_entries--; } mtx_unlock_spin(&pmc_threadfreelist_mtx); return (pt); } /* * Add a thread descriptor to the free pool. We use this instead of free() * to maintain a cache of free entries. Additionally, we can safely call * this function when we cannot call free(), such as in a critical section. */ static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt) { if (pt == NULL) return; memset(pt, 0, THREADENTRY_SIZE); mtx_lock_spin(&pmc_threadfreelist_mtx); LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next); pmc_threadfreelist_entries++; if (pmc_threadfreelist_entries > pmc_threadfreelist_max) taskqueue_enqueue(taskqueue_fast, &free_task); mtx_unlock_spin(&pmc_threadfreelist_mtx); } /* * An asynchronous task to manage the free list. */ static void pmc_thread_descriptor_pool_free_task(void *arg __unused, int pending __unused) { struct pmc_thread *pt; LIST_HEAD(, pmc_thread) tmplist; int delta; LIST_INIT(&tmplist); /* Determine what changes, if any, we need to make. */ mtx_lock_spin(&pmc_threadfreelist_mtx); delta = pmc_threadfreelist_entries - pmc_threadfreelist_max; while (delta > 0 && (pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { delta--; pmc_threadfreelist_entries--; LIST_REMOVE(pt, pt_next); LIST_INSERT_HEAD(&tmplist, pt, pt_next); } mtx_unlock_spin(&pmc_threadfreelist_mtx); /* If there are entries to free, free them. */ while (!LIST_EMPTY(&tmplist)) { pt = LIST_FIRST(&tmplist); LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * Drain the thread free pool, freeing all allocations. */ static void pmc_thread_descriptor_pool_drain(void) { struct pmc_thread *pt, *next; LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) { LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * find the descriptor corresponding to thread 'td', adding or removing it * as specified by 'mode'. * * Note that this supports additional mode flags in addition to those * supported by pmc_find_process_descriptor(): * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs. * This makes it safe to call while holding certain other locks. */ static struct pmc_thread * pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode) { struct pmc_thread *pt = NULL, *ptnew = NULL; int wait_flag; KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__)); /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to * acquiring the lock. */ if ((mode & PMC_FLAG_ALLOCATE) != 0) { if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { wait_flag = M_WAITOK; if ((mode & PMC_FLAG_NOWAIT) != 0 || in_epoch(global_epoch_preempt)) wait_flag = M_NOWAIT; ptnew = malloc(THREADENTRY_SIZE, M_PMC, wait_flag | M_ZERO); } } mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) { if (pt->pt_td == td) break; } if ((mode & PMC_FLAG_REMOVE) != 0 && pt != NULL) LIST_REMOVE(pt, pt_next); if ((mode & PMC_FLAG_ALLOCATE) != 0 && pt == NULL && ptnew != NULL) { pt = ptnew; ptnew = NULL; pt->pt_td = td; LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next); } mtx_unlock_spin(pp->pp_tdslock); if (ptnew != NULL) { free(ptnew, M_PMC); } return (pt); } /* * Try to add thread descriptors for each thread in a process. */ static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp) { struct pmc_thread **tdlist; struct thread *curtd; int i, tdcnt, tdlistsz; KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked", __LINE__)); tdcnt = 32; restart: tdlistsz = roundup2(tdcnt, 32); tdcnt = 0; tdlist = malloc(sizeof(struct pmc_thread *) * tdlistsz, M_TEMP, M_WAITOK); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, curtd) tdcnt++; if (tdcnt >= tdlistsz) { PROC_UNLOCK(p); free(tdlist, M_TEMP); goto restart; } /* * Try to add each thread to the list without sleeping. If unable, * add to a queue to retry after dropping the process lock. */ tdcnt = 0; FOREACH_THREAD_IN_PROC(p, curtd) { tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd, PMC_FLAG_ALLOCATE | PMC_FLAG_NOWAIT); if (tdlist[tdcnt] == NULL) { PROC_UNLOCK(p); for (i = 0; i <= tdcnt; i++) pmc_thread_descriptor_pool_free(tdlist[i]); free(tdlist, M_TEMP); goto restart; } tdcnt++; } PROC_UNLOCK(p); free(tdlist, M_TEMP); } /* * Find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ static struct pmc_process * pmc_find_process_descriptor(struct proc *p, uint32_t mode) { struct pmc_process *pp, *ppnew; struct pmc_processhash *pph; uint32_t hindex; hindex = PMC_HASH_PTR(p, pmc_processhashmask); pph = &pmc_processhash[hindex]; ppnew = NULL; /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if ((mode & PMC_FLAG_ALLOCATE) != 0) ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * sizeof(struct pmc_targetstate), M_PMC, M_WAITOK | M_ZERO); mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) { if (pp->pp_proc == p) break; } if ((mode & PMC_FLAG_REMOVE) != 0 && pp != NULL) LIST_REMOVE(pp, pp_next); if ((mode & PMC_FLAG_ALLOCATE) != 0 && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; LIST_INIT(&ppnew->pp_tds); ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew); LIST_INSERT_HEAD(pph, ppnew, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); pp = ppnew; ppnew = NULL; /* Add thread descriptors for this process' current threads. */ pmc_add_thread_descriptors_from_proc(p, pp); } else mtx_unlock_spin(&pmc_processhash_mtx); if (ppnew != NULL) free(ppnew, M_PMC); return (pp); } /* * Remove a process descriptor from the process hash table. */ static void pmc_remove_process_descriptor(struct pmc_process *pp) { KASSERT(pp->pp_refcnt == 0, ("[pmc,%d] Removing process descriptor %p with count %d", __LINE__, pp, pp->pp_refcnt)); mtx_lock_spin(&pmc_processhash_mtx); LIST_REMOVE(pp, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); } /* * Destroy a process descriptor. */ static void pmc_destroy_process_descriptor(struct pmc_process *pp) { struct pmc_thread *pmc_td; while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) { LIST_REMOVE(pmc_td, pt_next); pmc_thread_descriptor_pool_free(pmc_td); } free(pp, M_PMC); } /* * Find an owner descriptor corresponding to proc 'p'. */ static struct pmc_owner * pmc_find_owner_descriptor(struct proc *p) { struct pmc_owner *po; struct pmc_ownerhash *poh; uint32_t hindex; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; po = NULL; LIST_FOREACH(po, poh, po_next) { if (po->po_owner == p) break; } PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); return (po); } /* * Allocate a pmc descriptor and initialize its fields. */ static struct pmc * pmc_allocate_pmc_descriptor(void) { struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK | M_ZERO); pmc->pm_runcount = counter_u64_alloc(M_WAITOK); pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state) * mp_ncpus, M_PMC, M_WAITOK | M_ZERO); PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return (pmc); } /* * Destroy a pmc descriptor. */ static void pmc_destroy_pmc_descriptor(struct pmc *pm) { KASSERT(pm->pm_state == PMC_STATE_DELETED || pm->pm_state == PMC_STATE_FREE, ("[pmc,%d] destroying non-deleted PMC", __LINE__)); KASSERT(LIST_EMPTY(&pm->pm_targets), ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) == 0, ("[pmc,%d] pmc has non-zero run count %ju", __LINE__, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); counter_u64_free(pm->pm_runcount); free(pm->pm_pcpu_state, M_PMC); free(pm, M_PMC); } static void pmc_wait_for_pmc_idle(struct pmc *pm) { #ifdef INVARIANTS volatile int maxloop; maxloop = 100 * pmc_cpu_max(); #endif /* * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ pmclog_flush(pm->pm_owner, 1); while (counter_u64_fetch(pm->pm_runcount) > 0) { pmclog_flush(pm->pm_owner, 1); #ifdef INVARIANTS maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%ju) waiting too long for " "pmc to be free", __LINE__, PMC_TO_ROWINDEX(pm), (uintmax_t)counter_u64_fetch(pm->pm_runcount))); #endif pmc_force_context_switch(); } } /* * This function does the following things: * * - detaches the PMC from hardware * - unlinks all target threads that were attached to it * - removes the PMC from its owner's list * - destroys the PMC private mutex * * Once this function completes, the given pmc pointer can be freed by * calling pmc_destroy_pmc_descriptor(). */ static void pmc_release_pmc_descriptor(struct pmc *pm) { struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_hw *phw __diagused; struct pmc_owner *po; struct pmc_process *pp; struct pmc_target *ptgt, *tmp; enum pmc_mode mode; u_int adjri, ri, cpu; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mode = PMC_TO_MODE(pm); PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, mode); /* * First, we take the PMC off hardware. */ cpu = 0; if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch * to this CPU and turn hardware off. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_state == PMC_STATE_RUNNING && pm->pm_pcpu_state[cpu].pps_stalled == 0) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; KASSERT(phw->phw_pmc == pm, ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", __LINE__, ri, phw->phw_pmc, pm)); PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); critical_enter(); (void)pcd->pcd_stop_pmc(cpu, adjri, pm); critical_exit(); } PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); critical_enter(); (void)pcd->pcd_config_pmc(cpu, adjri, NULL); critical_exit(); /* adjust the global and process count of SS mode PMCs */ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { po = pm->pm_owner; po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); } } pm->pm_state = PMC_STATE_DELETED; pmc_restore_cpu_binding(&pb); /* * We could have references to this PMC structure in the * per-cpu sample queues. Wait for the queue to drain. */ pmc_wait_for_pmc_idle(pm); } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at a given * instant. * * By marking its state as DELETED, we ensure that this PMC is * never further scheduled on hardware. * * Then we wait till all CPUs are done with this PMC. */ pm->pm_state = PMC_STATE_DELETED; /* Wait for the PMCs runcount to come to zero. */ pmc_wait_for_pmc_idle(pm); /* * At this point the PMC is off all CPUs and cannot be freshly * scheduled onto a CPU. It is now safe to unlink all targets * from this PMC. If a process-record's refcount falls to zero, * we remove it from the hash table. The module-wide SX lock * protects us from races. */ LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { pp = ptgt->pt_process; pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); /* * If the target process record shows that no PMCs are * attached to it, reclaim its space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); } } cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ } /* * Release any MD resources. */ (void)pcd->pcd_release_pmc(cpu, adjri, pm); /* * Update row disposition. */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); /* Unlink from the owner's list. */ if (pm->pm_owner != NULL) { LIST_REMOVE(pm, pm_next); pm->pm_owner = NULL; } } /* * Register an owner and a pmc. */ static int pmc_register_owner(struct proc *p, struct pmc *pmc) { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(p)) == NULL) { if ((po = pmc_allocate_owner_descriptor(p)) == NULL) return (ENOMEM); } KASSERT(pmc->pm_owner == NULL, ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); pmc->pm_owner = po; LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_process_pmcallocate(pmc); PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", po, pmc); return (0); } /* * Return the current row disposition: * == 0 => FREE * > 0 => PROCESS MODE * < 0 => SYSTEM MODE */ int pmc_getrowdisp(int ri) { return (pmc_pmcdisp[ri]); } /* * Check if a PMC at row index 'ri' can be allocated to the current * process. * * Allocation can fail if: * - the current process is already being profiled by a PMC at index 'ri', * attached to it via OP_PMCATTACH. * - the current process has already allocated a PMC at index 'ri' * via OP_ALLOCATE. */ static bool pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; enum pmc_mode mode; PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); /* * We shouldn't have already allocated a process-mode PMC at * row index 'ri'. * * We shouldn't have allocated a system-wide PMC on the same * CPU and same RI. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (PMC_TO_ROWINDEX(pm) == ri) { mode = PMC_TO_MODE(pm); if (PMC_IS_VIRTUAL_MODE(mode)) return (false); if (PMC_IS_SYSTEM_MODE(mode) && PMC_TO_CPU(pm) == cpu) return (false); } } } /* * We also shouldn't be the target of any PMC at this index * since otherwise a PMC_ATTACH to ourselves will fail. */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc != NULL) return (false); PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", p, p->p_pid, p->p_comm, ri); return (true); } /* * Check if a given PMC at row index 'ri' can be currently used in * mode 'mode'. */ static bool pmc_can_allocate_row(int ri, enum pmc_mode mode) { enum pmc_disp disp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); if (PMC_IS_SYSTEM_MODE(mode)) disp = PMC_DISP_STANDALONE; else disp = PMC_DISP_THREAD; /* * check disposition for PMC row 'ri': * * Expected disposition Row-disposition Result * * STANDALONE STANDALONE or FREE proceed * STANDALONE THREAD fail * THREAD THREAD or FREE proceed * THREAD STANDALONE fail */ if (!PMC_ROW_DISP_IS_FREE(ri) && !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) return (false); /* * All OK */ PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); return (true); } /* * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc *pm; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (pm->pm_id == pmcid) return (pm); } return (NULL); } static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) { struct pmc *pm, *opm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid); if (PMC_ID_TO_ROWINDEX(pmcid) >= md->pmd_npmc) return (EINVAL); if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) { /* * In case of PMC_F_DESCENDANTS child processes we will not find * the current process in the owners hash list. Find the owner * process first and from there lookup the po. */ pp = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE); if (pp == NULL) return (ESRCH); opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc; if (opm == NULL) return (ESRCH); if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER | PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER | PMC_F_DESCENDANTS)) return (ESRCH); po = opm->pm_owner; } if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) return (EINVAL); PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); *pmc = pm; return (0); } /* * Start a PMC. */ static int pmc_start(struct pmc *pm) { struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_owner *po; pmc_value_t v; enum pmc_mode mode; int adjri, error, cpu, ri; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); mode = PMC_TO_MODE(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); error = 0; po = pm->pm_owner; PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); po = pm->pm_owner; /* * Disallow PMCSTART if a logfile is required but has not been * configured yet. */ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) != 0 && (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EDOOFUS); /* programming error */ /* * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH has never been done on this PMC, * attach it to its owner process. */ if (LIST_EMPTY(&pm->pm_targets)) { error = (pm->pm_flags & PMC_F_ATTACH_DONE) != 0 ? ESRCH : pmc_attach_process(po->po_owner, pm); } /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ if (error == 0) { pm->pm_state = PMC_STATE_RUNNING; if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) != 0) pmc_force_context_switch(); } return (error); } /* * A system-wide PMC. * * Add the owner to the global list if this is a system-wide * sampling PMC. */ if (mode == PMC_MODE_SS) { /* * Log mapping information for all existing processes in the * system. Subsequent mappings are logged as they happen; * see pmc_process_mmap(). */ if (po->po_logprocmaps == 0) { pmc_log_all_process_mappings(po); po->po_logprocmaps = 1; } po->po_sscount++; if (po->po_sscount == 1) { atomic_add_rel_int(&pmc_ss_count, 1); CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); PMCDBG1(PMC,OPS,1, "po=%p in global list", po); } } /* * Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); /* * global PMCs are configured at allocation time * so write out the initial value and start the PMC. */ pm->pm_state = PMC_STATE_RUNNING; critical_enter(); v = PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial; if ((error = pcd->pcd_write_pmc(cpu, adjri, pm, v)) == 0) { /* If a sampling mode PMC, reset stalled state. */ if (PMC_IS_SAMPLING_MODE(mode)) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. Start it. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; error = pcd->pcd_start_pmc(cpu, adjri, pm); } critical_exit(); pmc_restore_cpu_binding(&pb); return (error); } /* * Stop a PMC. */ static int pmc_stop(struct pmc *pm) { struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_owner *po; int adjri, cpu, error, ri; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; /* * If the PMC is a virtual mode one, changing the state to non-RUNNING * is enough to ensure that the PMC never gets scheduled. * * If this PMC is current running on a CPU, then it will handled * correctly at the time its target process is context switched out. */ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return (0); /* * A system-mode PMC. Move to the CPU associated with this PMC, and * stop the hardware. We update the 'initial count' so that a * subsequent PMCSTART will resume counting from the current hardware * count. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); pm->pm_pcpu_state[cpu].pps_cpustate = 0; critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri, pm)) == 0) { error = pcd->pcd_read_pmc(cpu, adjri, pm, &pm->pm_sc.pm_initial); } critical_exit(); pmc_restore_cpu_binding(&pb); /* Remove this owner from the global list of SS PMC owners. */ po = pm->pm_owner; if (PMC_TO_MODE(pm) == PMC_MODE_SS) { po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); } } return (error); } static struct pmc_classdep * pmc_class_to_classdep(enum pmc_class class) { int n; for (n = 0; n < md->pmd_nclass; n++) { if (md->pmd_classdep[n].pcd_class == class) return (&md->pmd_classdep[n]); } return (NULL); } #if defined(HWPMC_DEBUG) && defined(KTR) static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , __PMC_OPS() NULL }; #endif /* * The syscall interface */ #define PMC_GET_SX_XLOCK(...) do { \ sx_xlock(&pmc_sx); \ if (pmc_hook == NULL) { \ sx_xunlock(&pmc_sx); \ return __VA_ARGS__; \ } \ } while (0) #define PMC_DOWNGRADE_SX() do { \ sx_downgrade(&pmc_sx); \ is_sx_downgraded = true; \ } while (0) /* * Main body of PMC_OP_PMCALLOCATE. */ static int pmc_do_op_pmcallocate(struct thread *td, struct pmc_op_pmcallocate *pa) { struct proc *p; struct pmc *pmc; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_hw *phw; enum pmc_mode mode; enum pmc_class class; uint32_t caps, flags; u_int cpu; int adjri, n; int error; class = pa->pm_class; caps = pa->pm_caps; flags = pa->pm_flags; mode = pa->pm_mode; cpu = pa->pm_cpu; p = td->td_proc; /* Requested mode must exist. */ if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && mode != PMC_MODE_TS && mode != PMC_MODE_TC)) return (EINVAL); /* Requested CPU must be valid. */ if (cpu != PMC_CPU_ANY && cpu >= pmc_cpu_max()) return (EINVAL); /* * Virtual PMCs should only ask for a default CPU. * System mode PMCs need to specify a non-default CPU. */ if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != PMC_CPU_ANY) || (PMC_IS_SYSTEM_MODE(mode) && cpu == PMC_CPU_ANY)) return (EINVAL); /* * Check that an inactive CPU is not being asked for. */ if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) return (ENXIO); /* * Refuse an allocation for a system-wide PMC if this process has been * jailed, or if this process lacks super-user credentials and the * sysctl tunable 'security.bsd.unprivileged_syspmcs' is zero. */ if (PMC_IS_SYSTEM_MODE(mode)) { if (jailed(td->td_ucred)) return (EPERM); if (!pmc_unprivileged_syspmcs) { error = priv_check(td, PRIV_PMC_SYSTEM); if (error != 0) return (error); } } /* * Look for valid values for 'pm_flags'. */ if ((flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | - PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) != 0) + PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN | + PMC_F_EV_PMU)) != 0) return (EINVAL); /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN. */ if ((flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) == PMC_F_USERCALLCHAIN) return (EINVAL); /* PMC_F_USERCALLCHAIN is only valid for sampling mode. */ if ((flags & PMC_F_USERCALLCHAIN) != 0 && mode != PMC_MODE_TS && mode != PMC_MODE_SS) return (EINVAL); /* Process logging options are not allowed for system PMCs. */ if (PMC_IS_SYSTEM_MODE(mode) && (flags & (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT)) != 0) return (EINVAL); /* * All sampling mode PMCs need to be able to interrupt the CPU. */ if (PMC_IS_SAMPLING_MODE(mode)) caps |= PMC_CAP_INTERRUPT; /* A valid class specifier should have been passed in. */ pcd = pmc_class_to_classdep(class); if (pcd == NULL) return (EINVAL); /* The requested PMC capabilities should be feasible. */ if ((pcd->pcd_caps & caps) != caps) return (EOPNOTSUPP); PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", pa->pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); pmc->pm_id = PMC_ID_MAKE_ID(cpu, pa->pm_mode, class, PMC_ID_INVALID); pmc->pm_event = pa->pm_ev; pmc->pm_state = PMC_STATE_FREE; pmc->pm_caps = caps; pmc->pm_flags = flags; /* XXX set lower bound on sampling for process counters */ if (PMC_IS_SAMPLING_MODE(mode)) { /* * Don't permit requested sample rate to be less than * pmc_mincount. */ if (pa->pm_count < MAX(1, pmc_mincount)) log(LOG_WARNING, "pmcallocate: passed sample " "rate %ju - setting to %u\n", (uintmax_t)pa->pm_count, MAX(1, pmc_mincount)); pmc->pm_sc.pm_reloadcount = MAX(MAX(1, pmc_mincount), pa->pm_count); } else pmc->pm_sc.pm_initial = pa->pm_count; /* switch thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); #define PMC_IS_SHAREABLE_PMC(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ PMC_PHW_FLAG_IS_SHAREABLE) #define PMC_IS_UNALLOCATED(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) if (PMC_IS_SYSTEM_MODE(mode)) { pmc_select_cpu(cpu); for (n = pcd->pcd_ri; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (!pmc_can_allocate_row(n, mode) || !pmc_can_allocate_rowindex(p, n, cpu)) continue; if (!PMC_IS_UNALLOCATED(cpu, n) && !PMC_IS_SHAREABLE_PMC(cpu, n)) continue; if (pcd->pcd_allocate_pmc(cpu, adjri, pmc, pa) == 0) { /* Success. */ break; } } } else { /* Process virtual mode */ for (n = pcd->pcd_ri; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (!pmc_can_allocate_row(n, mode) || !pmc_can_allocate_rowindex(p, n, PMC_CPU_ANY)) continue; if (pcd->pcd_allocate_pmc(td->td_oncpu, adjri, pmc, pa) == 0) { /* Success. */ break; } } } #undef PMC_IS_UNALLOCATED #undef PMC_IS_SHAREABLE_PMC pmc_restore_cpu_binding(&pb); if (n == md->pmd_npmc) { pmc_destroy_pmc_descriptor(pmc); return (EINVAL); } /* Fill in the correct value in the ID field. */ pmc->pm_id = PMC_ID_MAKE_ID(cpu, mode, class, n); PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", pmc->pm_event, class, mode, n, pmc->pm_id); /* Process mode PMCs with logging enabled need log files. */ if ((pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) != 0) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* All system mode sampling PMCs require a log file. */ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* * Configure global pmc's immediately. */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; pcd = pmc_ri_to_classdep(md, n, &adjri); if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { (void)pcd->pcd_release_pmc(cpu, adjri, pmc); pmc_destroy_pmc_descriptor(pmc); pmc_restore_cpu_binding(&pb); return (EPERM); } pmc_restore_cpu_binding(&pb); } pmc->pm_state = PMC_STATE_ALLOCATED; pmc->pm_class = class; /* * Mark row disposition. */ if (PMC_IS_SYSTEM_MODE(mode)) PMC_MARK_ROW_STANDALONE(n); else PMC_MARK_ROW_THREAD(n); /* * Register this PMC with the current thread as its owner. */ error = pmc_register_owner(p, pmc); if (error != 0) { pmc_release_pmc_descriptor(pmc); pmc_destroy_pmc_descriptor(pmc); return (error); } /* * Return the allocated index. */ pa->pm_pmcid = pmc->pm_id; return (0); } /* * Main body of PMC_OP_PMCATTACH. */ static int pmc_do_op_pmcattach(struct thread *td, struct pmc_op_pmcattach a) { struct pmc *pm; struct proc *p; int error; sx_assert(&pmc_sx, SX_XLOCKED); if (a.pm_pid < 0) { return (EINVAL); } else if (a.pm_pid == 0) { a.pm_pid = td->td_proc->p_pid; } error = pmc_find_pmc(a.pm_pmc, &pm); if (error != 0) return (error); if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) return (EINVAL); /* PMCs may be (re)attached only when allocated or stopped */ if (pm->pm_state == PMC_STATE_RUNNING) { return (EBUSY); } else if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED) { return (EINVAL); } /* lookup pid */ if ((p = pfind(a.pm_pid)) == NULL) return (ESRCH); /* * Ignore processes that are working on exiting. */ if ((p->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(p); /* pfind() returns a locked process */ return (ESRCH); } /* * We are allowed to attach a PMC to a process if we can debug it. */ error = p_candebug(curthread, p); PROC_UNLOCK(p); if (error == 0) error = pmc_attach_process(p, pm); return (error); } /* * Main body of PMC_OP_PMCDETACH. */ static int pmc_do_op_pmcdetach(struct thread *td, struct pmc_op_pmcattach a) { struct pmc *pm; struct proc *p; int error; if (a.pm_pid < 0) { return (EINVAL); } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; error = pmc_find_pmc(a.pm_pmc, &pm); if (error != 0) return (error); if ((p = pfind(a.pm_pid)) == NULL) return (ESRCH); /* * Treat processes that are in the process of exiting as if they were * not present. */ if ((p->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(p); return (ESRCH); } PROC_UNLOCK(p); /* pfind() returns a locked process */ if (error == 0) error = pmc_detach_process(p, pm); return (error); } /* * Main body of PMC_OP_PMCRELEASE. */ static int pmc_do_op_pmcrelease(pmc_id_t pmcid) { struct pmc_owner *po; struct pmc *pm; int error; /* * Find PMC pointer for the named PMC. * * Use pmc_release_pmc_descriptor() to switch off the * PMC, remove all its target threads, and remove the * PMC from its owner's list. * * Remove the owner record if this is the last PMC * owned. * * Free up space. */ error = pmc_find_pmc(pmcid, &pm); if (error != 0) return (error); po = pm->pm_owner; pmc_release_pmc_descriptor(pm); pmc_maybe_remove_owner(po); pmc_destroy_pmc_descriptor(pm); return (error); } /* * Main body of PMC_OP_PMCRW. */ static int pmc_do_op_pmcrw(const struct pmc_op_pmcrw *prw, pmc_value_t *valp) { struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc *pm; u_int cpu, ri, adjri; int error; PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw->pm_pmcid, prw->pm_flags); /* Must have at least one flag set. */ if ((prw->pm_flags & (PMC_F_OLDVALUE | PMC_F_NEWVALUE)) == 0) return (EINVAL); /* Locate PMC descriptor. */ error = pmc_find_pmc(prw->pm_pmcid, &pm); if (error != 0) return (error); /* Can't read a PMC that hasn't been started. */ if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_RUNNING) return (EINVAL); /* Writing a new value is allowed only for 'STOPPED' PMCs. */ if (pm->pm_state == PMC_STATE_RUNNING && (prw->pm_flags & PMC_F_NEWVALUE) != 0) return (EBUSY); if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { /* * If this PMC is attached to its owner (i.e., the process * requesting this operation) and is running, then attempt to * get an upto-date reading from hardware for a READ. Writes * are only allowed when the PMC is stopped, so only update the * saved value field. * * If the PMC is not running, or is not attached to its owner, * read/write to the savedvalue field. */ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mtx_pool_lock_spin(pmc_mtxpool, pm); cpu = curthread->td_oncpu; if ((prw->pm_flags & PMC_F_OLDVALUE) != 0) { if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && (pm->pm_state == PMC_STATE_RUNNING)) { error = (*pcd->pcd_read_pmc)(cpu, adjri, pm, valp); } else { *valp = pm->pm_gv.pm_savedvalue; } } if ((prw->pm_flags & PMC_F_NEWVALUE) != 0) pm->pm_gv.pm_savedvalue = prw->pm_value; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ cpu = PMC_TO_CPU(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); if (!pmc_cpu_is_active(cpu)) return (ENXIO); /* Move this thread to CPU 'cpu'. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); critical_enter(); /* Save old value. */ if ((prw->pm_flags & PMC_F_OLDVALUE) != 0) error = (*pcd->pcd_read_pmc)(cpu, adjri, pm, valp); /* Write out new value. */ if (error == 0 && (prw->pm_flags & PMC_F_NEWVALUE) != 0) error = (*pcd->pcd_write_pmc)(cpu, adjri, pm, prw->pm_value); critical_exit(); pmc_restore_cpu_binding(&pb); if (error != 0) return (error); } #ifdef HWPMC_DEBUG if ((prw->pm_flags & PMC_F_NEWVALUE) != 0) PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx", ri, prw->pm_value, *valp); else PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, *valp); #endif return (error); } static int pmc_syscall_handler(struct thread *td, void *syscall_args) { struct pmc_syscall_args *c; void *pmclog_proc_handle; void *arg; int error, op; bool is_sx_downgraded; c = (struct pmc_syscall_args *)syscall_args; op = c->pmop_code; arg = c->pmop_data; /* PMC isn't set up yet */ if (pmc_hook == NULL) return (EINVAL); if (op == PMC_OP_CONFIGURELOG) { /* * We cannot create the logging process inside * pmclog_configure_log() because there is a LOR * between pmc_sx and process structure locks. * Instead, pre-create the process and ignite the loop * if everything is fine, otherwise direct the process * to exit. */ error = pmclog_proc_create(td, &pmclog_proc_handle); if (error != 0) goto done_syscall; } PMC_GET_SX_XLOCK(ENOSYS); is_sx_downgraded = false; PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); error = 0; counter_u64_add(pmc_stats.pm_syscalls, 1); switch (op) { /* * Configure a log file. * * XXX This OP will be reworked. */ case PMC_OP_CONFIGURELOG: { struct proc *p; struct pmc *pm; struct pmc_owner *po; struct pmc_op_configurelog cl; if ((error = copyin(arg, &cl, sizeof(cl))) != 0) { pmclog_proc_ignite(pmclog_proc_handle, NULL); break; } /* No flags currently implemented */ if (cl.pm_flags != 0) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = EINVAL; break; } /* mark this process as owning a log file */ p = td->td_proc; if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = ENOMEM; break; } /* * If a valid fd was passed in, try to configure that, * otherwise if 'fd' was less than zero and there was * a log file configured, flush its buffers and * de-configure it. */ if (cl.pm_logfd >= 0) { error = pmclog_configure_log(md, po, cl.pm_logfd); pmclog_proc_ignite(pmclog_proc_handle, error == 0 ? po : NULL); } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = pmclog_close(po); if (error == 0) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && pm->pm_state == PMC_STATE_RUNNING) pmc_stop(pm); error = pmclog_deconfigure_log(po); } } else { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = EINVAL; } } break; /* * Flush a log file. */ case PMC_OP_FLUSHLOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_flush(po, 0); } break; /* * Close a log file. */ case PMC_OP_CLOSELOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_close(po); } break; /* * Retrieve hardware configuration. */ case PMC_OP_GETCPUINFO: /* CPU information */ { struct pmc_op_getcpuinfo gci; struct pmc_classinfo *pci; struct pmc_classdep *pcd; int cl; memset(&gci, 0, sizeof(gci)); gci.pm_cputype = md->pmd_cputype; gci.pm_ncpu = pmc_cpu_max(); gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; pci = gci.pm_classes; pcd = md->pmd_classdep; for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { pci->pm_caps = pcd->pcd_caps; pci->pm_class = pcd->pcd_class; pci->pm_width = pcd->pcd_width; pci->pm_num = pcd->pcd_num; } error = copyout(&gci, arg, sizeof(gci)); } break; /* * Retrieve soft events list. */ case PMC_OP_GETDYNEVENTINFO: { enum pmc_class cl; enum pmc_event ev; struct pmc_op_getdyneventinfo *gei; struct pmc_dyn_event_descr dev; struct pmc_soft *ps; uint32_t nevent; sx_assert(&pmc_sx, SX_LOCKED); gei = (struct pmc_op_getdyneventinfo *) arg; if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) break; /* Only SOFT class is dynamic. */ if (cl != PMC_CLASS_SOFT) { error = EINVAL; break; } nevent = 0; for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { ps = pmc_soft_ev_acquire(ev); if (ps == NULL) continue; bcopy(&ps->ps_ev, &dev, sizeof(dev)); pmc_soft_ev_release(ps); error = copyout(&dev, &gei->pm_events[nevent], sizeof(struct pmc_dyn_event_descr)); if (error != 0) break; nevent++; } if (error != 0) break; error = copyout(&nevent, &gei->pm_nevent, sizeof(nevent)); } break; /* * Get module statistics */ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; #define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field) CFETCH(gms, pmc_stats, pm_intr_ignored); CFETCH(gms, pmc_stats, pm_intr_processed); CFETCH(gms, pmc_stats, pm_intr_bufferfull); CFETCH(gms, pmc_stats, pm_syscalls); CFETCH(gms, pmc_stats, pm_syscall_errors); CFETCH(gms, pmc_stats, pm_buffer_requests); CFETCH(gms, pmc_stats, pm_buffer_requests_failed); CFETCH(gms, pmc_stats, pm_log_sweeps); #undef CFETCH error = copyout(&gms, arg, sizeof(gms)); } break; /* * Retrieve module version number */ case PMC_OP_GETMODULEVERSION: { uint32_t cv, modv; /* retrieve the client's idea of the ABI version */ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) break; /* don't service clients newer than our driver */ modv = PMC_VERSION; if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { error = EPROGMISMATCH; break; } error = copyout(&modv, arg, sizeof(int)); } break; /* * Retrieve the state of all the PMCs on a given * CPU. */ case PMC_OP_GETPMCINFO: { int ari; struct pmc *pm; size_t pmcinfo_size; uint32_t cpu, n, npmc; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_info *p, *pmcinfo; struct pmc_op_getpmcinfo *gpi; PMC_DOWNGRADE_SX(); gpi = (struct pmc_op_getpmcinfo *) arg; if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) break; if (cpu >= pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* switch to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); npmc = md->pmd_npmc; pmcinfo_size = npmc * sizeof(struct pmc_info); pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK | M_ZERO); p = pmcinfo; for (n = 0; n < md->pmd_npmc; n++, p++) { pcd = pmc_ri_to_classdep(md, n, &ari); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) break; if (PMC_ROW_DISP_IS_STANDALONE(n)) p->pm_rowdisp = PMC_DISP_STANDALONE; else if (PMC_ROW_DISP_IS_THREAD(n)) p->pm_rowdisp = PMC_DISP_THREAD; else p->pm_rowdisp = PMC_DISP_FREE; p->pm_ownerpid = -1; if (pm == NULL) /* no PMC associated */ continue; po = pm->pm_owner; KASSERT(po->po_owner != NULL, ("[pmc,%d] pmc_owner had a null proc pointer", __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } pmc_restore_cpu_binding(&pb); /* now copy out the PMC info collected */ if (error == 0) error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); free(pmcinfo, M_PMC); } break; /* * Set the administrative state of a PMC. I.e. whether * the PMC is to be used or not. */ case PMC_OP_PMCADMIN: { int cpu, ri; enum pmc_state request; struct pmc_cpu *pc; struct pmc_hw *phw; struct pmc_op_pmcadmin pma; struct pmc_binding pb; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); error = priv_check(td, PRIV_PMC_MANAGE); if (error) break; if ((error = copyin(arg, &pma, sizeof(pma))) != 0) break; cpu = pma.pm_cpu; if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } request = pma.pm_state; if (request != PMC_STATE_DISABLED && request != PMC_STATE_FREE) { error = EINVAL; break; } ri = pma.pm_pmc; /* pmc id == row index */ if (ri < 0 || ri >= (int) md->pmd_npmc) { error = EINVAL; break; } /* * We can't disable a PMC with a row-index allocated * for process virtual PMCs. */ if (PMC_ROW_DISP_IS_THREAD(ri) && request == PMC_STATE_DISABLED) { error = EBUSY; break; } /* * otherwise, this PMC on this CPU is either free or * in system-wide mode. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); pc = pmc_pcpu[cpu]; phw = pc->pc_hwpmcs[ri]; /* * XXX do we need some kind of 'forced' disable? */ if (phw->phw_pmc == NULL) { if (request == PMC_STATE_DISABLED && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; PMC_MARK_ROW_STANDALONE(ri); } else if (request == PMC_STATE_FREE && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; PMC_UNMARK_ROW_STANDALONE(ri); } /* other cases are a no-op */ } else error = EBUSY; pmc_restore_cpu_binding(&pb); } break; /* * Allocate a PMC. */ case PMC_OP_PMCALLOCATE: { struct pmc_op_pmcallocate pa; error = copyin(arg, &pa, sizeof(pa)); if (error != 0) break; error = pmc_do_op_pmcallocate(td, &pa); if (error != 0) break; error = copyout(&pa, arg, sizeof(pa)); } break; /* * Attach a PMC to a process. */ case PMC_OP_PMCATTACH: { struct pmc_op_pmcattach a; error = copyin(arg, &a, sizeof(a)); if (error != 0) break; error = pmc_do_op_pmcattach(td, a); } break; /* * Detach an attached PMC from a process. */ case PMC_OP_PMCDETACH: { struct pmc_op_pmcattach a; error = copyin(arg, &a, sizeof(a)); if (error != 0) break; error = pmc_do_op_pmcdetach(td, a); } break; /* * Retrieve the MSR number associated with the counter * 'pmc_id'. This allows processes to directly use RDPMC * instructions to read their PMCs, without the overhead of a * system call. */ case PMC_OP_PMCGETMSR: { int adjri, ri; struct pmc *pm; struct pmc_target *pt; struct pmc_op_getmsr gm; struct pmc_classdep *pcd; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &gm, sizeof(gm))) != 0) break; if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) break; /* * The allocated PMC has to be a process virtual PMC, * i.e., of type MODE_T[CS]. Global PMCs can only be * read using the PMCREAD operation since they may be * allocated on a different CPU than the one we could * be running on at the time of the RDPMC instruction. * * The GETMSR operation is not allowed for PMCs that * are inherited across processes. */ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } /* * It only makes sense to use a RDPMC (or its * equivalent instruction on non-x86 architectures) on * a process that has allocated and attached a PMC to * itself. Conversely the PMC is only allowed to have * one process attached to it -- its owner. */ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || LIST_NEXT(pt, pt_next) != NULL || pt->pt_process->pp_proc != pm->pm_owner->po_owner) { error = EINVAL; break; } ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); /* PMC class has no 'GETMSR' support */ if (pcd->pcd_get_msr == NULL) { error = ENOSYS; break; } if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) break; if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; /* * Mark our process as using MSRs. Update machine * state using a forced context switch. */ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; pmc_force_context_switch(); } break; /* * Release an allocated PMC. */ case PMC_OP_PMCRELEASE: { struct pmc_op_simple sp; error = copyin(arg, &sp, sizeof(sp)); if (error != 0) break; error = pmc_do_op_pmcrelease(sp.pm_pmcid); } break; /* * Read and/or write a PMC. */ case PMC_OP_PMCRW: { struct pmc_op_pmcrw prw; struct pmc_op_pmcrw *pprw; pmc_value_t oldvalue; PMC_DOWNGRADE_SX(); error = copyin(arg, &prw, sizeof(prw)); if (error != 0) break; error = pmc_do_op_pmcrw(&prw, &oldvalue); if (error != 0) break; /* Return old value if requested. */ if ((prw.pm_flags & PMC_F_OLDVALUE) != 0) { pprw = arg; error = copyout(&oldvalue, &pprw->pm_value, sizeof(prw.pm_value)); } } break; /* * Set the sampling rate for a sampling mode PMC and the * initial count for a counting mode PMC. */ case PMC_OP_PMCSETCOUNT: { struct pmc *pm; struct pmc_op_pmcsetcount sc; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sc, sizeof(sc))) != 0) break; if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) break; if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { /* * Don't permit requested sample rate to be * less than pmc_mincount. */ if (sc.pm_count < MAX(1, pmc_mincount)) log(LOG_WARNING, "pmcsetcount: passed sample " "rate %ju - setting to %u\n", (uintmax_t)sc.pm_count, MAX(1, pmc_mincount)); pm->pm_sc.pm_reloadcount = MAX(MAX(1, pmc_mincount), sc.pm_count); } else pm->pm_sc.pm_initial = sc.pm_count; } break; /* * Start a PMC. */ case PMC_OP_PMCSTART: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmcid %x != id %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; else if (pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_ALLOCATED) { error = EINVAL; break; } error = pmc_start(pm); } break; /* * Stop a PMC. */ case PMC_OP_PMCSTOP: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; /* * Mark the PMC as inactive and invoke the MD stop * routines if needed. */ if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; else if (pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } error = pmc_stop(pm); } break; /* * Write a user supplied value to the log file. */ case PMC_OP_WRITELOG: { struct pmc_op_writelog wl; struct pmc_owner *po; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &wl, sizeof(wl))) != 0) break; if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; break; } error = pmclog_process_userlog(po, &wl); } break; default: error = EINVAL; break; } if (is_sx_downgraded) sx_sunlock(&pmc_sx); else sx_xunlock(&pmc_sx); done_syscall: if (error) counter_u64_add(pmc_stats.pm_syscall_errors, 1); return (error); } /* * Helper functions */ /* * Mark the thread as needing callchain capture and post an AST. The * actual callchain capture will be done in a context where it is safe * to take page faults. */ static void pmc_post_callchain_callback(void) { struct thread *td; td = curthread; /* * If there is multiple PMCs for the same interrupt ignore new post */ if ((td->td_pflags & TDP_CALLCHAIN) != 0) return; /* * Mark this thread as needing callchain capture. * `td->td_pflags' will be safe to touch because this thread * was in user space when it was interrupted. */ td->td_pflags |= TDP_CALLCHAIN; /* * Don't let this thread migrate between CPUs until callchain * capture completes. */ sched_pin(); return; } /* * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ static int pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf) { struct pmc_sample *ps; struct pmc_samplebuffer *psb; struct thread *td; int error, cpu, callchaindepth; bool inuserspace; error = 0; /* * Allocate space for a sample buffer. */ cpu = curcpu; psb = pmc_pcpu[cpu]->pc_sb[ring]; inuserspace = TRAPF_USERMODE(tf); ps = PMC_PROD_SAMPLE(psb); if (psb->ps_considx != psb->ps_prodidx && ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_pcpu_state[cpu].pps_stalled = 1; counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, tf, inuserspace, (int)(psb->ps_prodidx & pmc_sample_mask), (int)(psb->ps_considx & pmc_sample_mask)); callchaindepth = 1; error = ENOMEM; goto done; } /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, tf, inuserspace, (int)(psb->ps_prodidx & pmc_sample_mask), (int)(psb->ps_considx & pmc_sample_mask)); td = curthread; ps->ps_pmc = pm; ps->ps_td = td; ps->ps_pid = td->td_proc->p_pid; ps->ps_tid = td->td_tid; ps->ps_tsc = pmc_rdtsc(); ps->ps_ticks = ticks; ps->ps_cpu = cpu; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; MPASS(ps->ps_pc != NULL); if (callchaindepth == 1) { ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); } else { /* * Kernel stack traversals can be done immediately, while we * defer to an AST for user space traversals. */ if (!inuserspace) { callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, callchaindepth, tf); } else { pmc_post_callchain_callback(); callchaindepth = PMC_USER_CALLCHAIN_PENDING; } } ps->ps_nsamples = callchaindepth; /* mark entry as in-use */ if (ring == PMC_UR) { ps->ps_nsamples_actual = callchaindepth; ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING; } KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] pm=%p runcount %ju", __LINE__, pm, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ /* increment write pointer */ psb->ps_prodidx++; done: /* mark CPU as needing processing */ if (callchaindepth != PMC_USER_CALLCHAIN_PENDING) DPCPU_SET(pmc_sampled, 1); return (error); } /* * Interrupt processing. * * This function may be called from an NMI handler. It cannot use any of the * locking primitives supplied by the OS. */ int pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf) { struct thread *td; td = curthread; if ((pm->pm_flags & PMC_F_USERCALLCHAIN) && (td->td_proc->p_flag & P_KPROC) == 0 && !TRAPF_USERMODE(tf)) { atomic_add_int(&td->td_pmcpend, 1); return (pmc_add_sample(PMC_UR, pm, tf)); } return (pmc_add_sample(ring, pm, tf)); } /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets * rescheduled. */ static void pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) { struct pmc *pm; struct pmc_sample *ps; struct pmc_samplebuffer *psb; struct thread *td; uint64_t considx, prodidx; int nsamples, nrecords, pass, iter; int start_ticks __diagused; psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; nrecords = INT_MAX; pass = 0; start_ticks = ticks; KASSERT(td->td_pflags & TDP_CALLCHAIN, ("[pmc,%d] Retrieving callchain for thread that doesn't want it", __LINE__)); restart: if (ring == PMC_UR) nrecords = atomic_readandclear_32(&td->td_pmcpend); for (iter = 0, considx = psb->ps_considx, prodidx = psb->ps_prodidx; considx < prodidx && iter < pmc_nsamples; considx++, iter++) { ps = PMC_CONS_SAMPLE_OFF(psb, considx); /* * Iterate through all deferred callchain requests. Walk from * the current read pointer to the current write pointer. */ #ifdef INVARIANTS if (ps->ps_nsamples == PMC_SAMPLE_FREE) { continue; } #endif if (ps->ps_td != td || ps->ps_nsamples != PMC_USER_CALLCHAIN_PENDING || ps->ps_pmc->pm_state != PMC_STATE_RUNNING) continue; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, ps->ps_cpu, PCPU_GET(cpuid))); pm = ps->ps_pmc; KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] runcount %ju", __LINE__, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); if (ring == PMC_UR) { nsamples = ps->ps_nsamples_actual; counter_u64_add(pmc_stats.pm_merges, 1); } else nsamples = 0; /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ if (__predict_true(nsamples < pmc_callchaindepth - 1)) nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples, pmc_callchaindepth - nsamples - 1, tf); /* * We have to prevent hardclock from potentially overwriting * this sample between when we read the value and when we set * it. */ spinlock_enter(); /* * Verify that the sample hasn't been dropped in the meantime. */ if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) { ps->ps_nsamples = nsamples; /* * If we couldn't get a sample, simply drop the * reference. */ if (nsamples == 0) counter_u64_add(pm->pm_runcount, -1); } spinlock_exit(); if (nrecords-- == 1) break; } if (__predict_false(ring == PMC_UR && td->td_pmcpend)) { if (pass == 0) { pass = 1; goto restart; } /* only collect samples for this part once */ td->td_pmcpend = 0; } #ifdef INVARIANTS if ((ticks - start_ticks) > hz) log(LOG_ERR, "%s took %d ticks\n", __func__, (ticks - start_ticks)); #endif /* mark CPU as needing processing */ DPCPU_SET(pmc_sampled, 1); } /* * Process saved PC samples. */ static void pmc_process_samples(int cpu, ring_type_t ring) { struct pmc *pm; struct thread *td; struct pmc_owner *po; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; uint64_t delta __diagused; int adjri, n; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; delta = psb->ps_prodidx - psb->ps_considx; MPASS(delta <= pmc_nsamples); MPASS(psb->ps_considx <= psb->ps_prodidx); for (n = 0; psb->ps_considx < psb->ps_prodidx; psb->ps_considx++, n++) { ps = PMC_CONS_SAMPLE(psb); if (__predict_false(ps->ps_nsamples == PMC_SAMPLE_FREE)) continue; /* skip non-running samples */ pm = ps->ps_pmc; if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ju", __LINE__, pm, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); po = pm->pm_owner; /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) { /* * If we've been waiting more than 1 tick to * collect a callchain for this record then * drop it and move on. */ if (ticks - ps->ps_ticks > 1) { /* * Track how often we hit this as it will * preferentially lose user samples * for long running system calls. */ counter_u64_add(pmc_stats.pm_overwrites, 1); goto entrydone; } /* Need a rescan at a later time. */ DPCPU_SET(pmc_sampled, 1); break; } PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, (int)(psb->ps_prodidx & pmc_sample_mask), (int)(psb->ps_considx & pmc_sample_mask)); /* * If this is a process-mode PMC that is attached to * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. * * Otherwise, this is either a sampling-mode PMC that * is attached to a different process than its owner, * or a system-wide sampling PMC. Dispatch a log * entry to the PMC's owner process. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } } else pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ju", __LINE__, pm, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); counter_u64_add(pm->pm_runcount, -1); } counter_u64_add(pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) return; /* * Restart any stalled sampling PMCs on this CPU. * * If the NMI handler sets the pm_stalled field of a PMC after * the check below, we'll end up processing the stalled PMC at * the next hardclock tick. */ for (n = 0; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); (void)(*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ !pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */ !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */ continue; pm->pm_pcpu_state[cpu].pps_stalled = 0; (void)(*pcd->pcd_start_pmc)(cpu, adjri, pm); } } /* * Event handlers. */ /* * Handle a process exit. * * Remove this process from all hash tables. If this process * owned any PMCs, turn off those PMCs and deallocate them, * removing any associations with target processes. * * This function will be called by the last 'thread' of a * process. * * XXX This eventhandler gets called early in the exit process. * Consider using a 'hook' invocation from thread_exit() or equivalent * spot. Another negative is that kse_exit doesn't seem to call * exit1() [??]. */ static void pmc_process_exit(void *arg __unused, struct proc *p) { struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; struct pmc_classdep *pcd; pmc_value_t newvalue, tmp; int ri, adjri, cpu; bool is_using_hwpmcs; PROC_LOCK(p); is_using_hwpmcs = (p->p_flag & P_HWPMC) != 0; PROC_UNLOCK(p); /* * Log a sysexit event to all SS PMC owners. */ PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_process_sysexit(po, p->p_pid); } PMC_EPOCH_EXIT(); PMC_GET_SX_XLOCK(); PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); if (!is_using_hwpmcs) goto out; /* * Since this code is invoked by the last thread in an exiting process, * we would have context switched IN at some prior point. However, with * PREEMPTION, kernel mode context switches may happen any time, so we * want to disable a context switch OUT till we get any PMCs targeting * this process off the hardware. * * We also need to atomically remove this process' entry from our * target process hash table, using PMC_FLAG_REMOVE. */ PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); critical_enter(); /* no preemption */ cpu = curthread->td_oncpu; pp = pmc_find_process_descriptor(p, PMC_FLAG_REMOVE); if (pp == NULL) { critical_exit(); goto out; } PMCDBG2(PRC,EXT,2, "process-exit proc=%p pmc-process=%p", p, pp); /* * The exiting process could be the target of some PMCs which will be * running on currently executing CPU. * * We need to turn these PMCs off like we would do at context switch * OUT time. */ for (ri = 0; ri < md->pmd_npmc; ri++) { /* * Pick up the pmc pointer from hardware state similar to the * CSW_OUT code. */ pm = NULL; pcd = pmc_ri_to_classdep(md, ri, &adjri); (void)(*pcd->pcd_get_config)(cpu, adjri, &pm); PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] bad runcount ri %d rc %ju", __LINE__, ri, (uintmax_t)counter_u64_fetch(pm->pm_runcount))); /* * Change desired state, and then stop if not stalled. This * two-step dance should avoid race conditions where an * interrupt re-enables the PMC after this code has already * checked the pm_stalled flag. */ if (pm->pm_pcpu_state[cpu].pps_cpustate) { pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (!pm->pm_pcpu_state[cpu].pps_stalled) { (void)pcd->pcd_stop_pmc(cpu, adjri, pm); if (PMC_TO_MODE(pm) == PMC_MODE_TC) { pcd->pcd_read_pmc(cpu, adjri, pm, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu, ri); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); } } } KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); counter_u64_add(pm->pm_runcount, -1); (void)pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Inform the MD layer of this pseudo "context switch out". */ (void)md->pmd_switch_out(pmc_pcpu[cpu], pp); critical_exit(); /* ok to be pre-empted now */ /* * Unlink this process from the PMCs that are targeting it. This will * send a signal to all PMC owner's whose PMCs are orphaned. * * Log PMC value at exit time if requested. */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) != 0 && PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) { pmclog_process_procexit(pm, pp); } pmc_unlink_target_process(pm, pp); } } free(pp, M_PMC); out: /* * If the process owned PMCs, free them up and free up memory. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_close(po); pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } sx_xunlock(&pmc_sx); } /* * Handle a process fork. * * If the parent process 'p1' is under HWPMC monitoring, then copy * over any attached PMCs that have 'do_descendants' semantics. */ static void pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, int flags __unused) { struct pmc *pm; struct pmc_owner *po; struct pmc_process *ppnew, *ppold; unsigned int ri; bool is_using_hwpmcs, do_descendants; PROC_LOCK(p1); is_using_hwpmcs = (p1->p_flag & P_HWPMC) != 0; PROC_UNLOCK(p1); /* * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); pmclog_process_proccreate(po, newproc, 1); } } PMC_EPOCH_EXIT(); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, p1->p_pid, p1->p_comm, newproc); /* * If the parent process (curthread->td_proc) is a * target of any PMCs, look for PMCs that are to be * inherited, and link these into the new process * descriptor. */ ppold = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE); if (ppold == NULL) goto done; /* nothing to do */ do_descendants = false; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS) != 0) { do_descendants = true; break; } } if (!do_descendants) /* nothing to do */ goto done; /* * Now mark the new process as being tracked by this driver. */ PROC_LOCK(newproc); newproc->p_flag |= P_HWPMC; PROC_UNLOCK(newproc); /* Allocate a descriptor for the new process. */ ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE); if (ppnew == NULL) goto done; /* * Run through all PMCs that were targeting the old process * and which specified F_DESCENDANTS and attach them to the * new process. * * Log the fork event to all owners of PMCs attached to this * process, if not already logged. */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS) != 0) { pmc_link_target_process(pm, ppnew); po = pm->pm_owner; if (po->po_sscount == 0 && (po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) { pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); } } } done: sx_xunlock(&pmc_sx); } static void pmc_process_threadcreate(struct thread *td) { struct pmc_owner *po; PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_process_threadcreate(po, td, 1); } PMC_EPOCH_EXIT(); } static void pmc_process_threadexit(struct thread *td) { struct pmc_owner *po; PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_process_threadexit(po, td); } PMC_EPOCH_EXIT(); } static void pmc_process_proccreate(struct proc *p) { struct pmc_owner *po; PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) pmclog_process_proccreate(po, p, 1 /* sync */); } PMC_EPOCH_EXIT(); } static void pmc_process_allproc(struct pmc *pm) { struct pmc_owner *po; struct thread *td; struct proc *p; po = pm->pm_owner; if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { pmclog_process_proccreate(po, p, 0 /* sync */); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) pmclog_process_threadcreate(po, td, 0 /* sync */); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); pmclog_flush(po, 0); } static void pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; /* * Notify owners of system sampling PMCs about KLD operations. */ PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->pathname); } PMC_EPOCH_EXIT(); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ } static void pmc_kld_unload(void *arg __unused, const char *filename __unused, caddr_t address, size_t size) { struct pmc_owner *po; PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) { if ((po->po_flags & PMC_PO_OWNS_LOGFILE) != 0) { pmclog_process_map_out(po, (pid_t)-1, (uintfptr_t)address, (uintfptr_t)address + size); } } PMC_EPOCH_EXIT(); /* * TODO: Notify owners of process-sampling PMCs. */ } /* * initialization */ static const char * pmc_name_of_pmcclass(enum pmc_class class) { switch (class) { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) \ case PMC_CLASS_##S: \ return #S; __PMC_CLASSES(); default: return (""); } } /* * Base class initializer: allocate structure and set default classes. */ struct pmc_mdep * pmc_mdep_alloc(int nclasses) { struct pmc_mdep *md; int n; /* SOFT + md classes */ n = 1 + nclasses; md = malloc(sizeof(struct pmc_mdep) + n * sizeof(struct pmc_classdep), M_PMC, M_WAITOK | M_ZERO); md->pmd_nclass = n; /* Default methods */ md->pmd_switch_in = generic_switch_in; md->pmd_switch_out = generic_switch_out; /* Add base class. */ pmc_soft_initialize(md); return (md); } void pmc_mdep_free(struct pmc_mdep *md) { pmc_soft_finalize(md); free(md, M_PMC); } static int generic_switch_in(struct pmc_cpu *pc __unused, struct pmc_process *pp __unused) { return (0); } static int generic_switch_out(struct pmc_cpu *pc __unused, struct pmc_process *pp __unused) { return (0); } static struct pmc_mdep * pmc_generic_cpu_initialize(void) { struct pmc_mdep *md; md = pmc_mdep_alloc(0); md->pmd_cputype = PMC_CPU_GENERIC; return (md); } static void pmc_generic_cpu_finalize(struct pmc_mdep *md __unused) { } static int pmc_initialize(void) { struct pcpu *pc; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_sample *ps; struct pmc_samplebuffer *sb; int c, cpu, error, n, ri; u_int maxcpu, domain; md = NULL; error = 0; pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK); pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK); #ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr))) { pmc_debugflags_parse(pmc_debugstr, pmc_debugstr + strlen(pmc_debugstr)); } #endif PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); /* check kernel version */ if (pmc_kernel_version != PMC_VERSION) { if (pmc_kernel_version == 0) printf("hwpmc: this kernel has not been compiled with " "'options HWPMC_HOOKS'.\n"); else printf("hwpmc: kernel version (0x%x) does not match " "module version (0x%x).\n", pmc_kernel_version, PMC_VERSION); return (EPROGMISMATCH); } /* * check sysctl parameters */ if (pmc_hashsize <= 0) { printf("hwpmc: tunable \"hashsize\"=%d must be " "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { printf("hwpmc: tunable \"nsamples\"=%d out of " "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } pmc_sample_mask = pmc_nsamples - 1; if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { printf("hwpmc: tunable \"callchaindepth\"=%d out of " "range - using %d.\n", pmc_callchaindepth, PMC_CALLCHAIN_DEPTH_MAX); pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; } md = pmc_md_initialize(); if (md == NULL) { /* Default to generic CPU. */ md = pmc_generic_cpu_initialize(); if (md == NULL) return (ENOSYS); } /* * Refresh classes base ri. Optional classes may come in different * order. */ for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; pcd->pcd_ri = ri; ri += pcd->pcd_num; } KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); /* Compute the map from row-indices to classdep pointers. */ pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * md->pmd_npmc, M_PMC, M_WAITOK | M_ZERO); for (n = 0; n < md->pmd_npmc; n++) pmc_rowindex_to_classdep[n] = NULL; for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; for (n = 0; n < pcd->pcd_num; n++, ri++) pmc_rowindex_to_classdep[ri] = pcd; } KASSERT(ri == md->pmd_npmc, ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, ri, md->pmd_npmc)); maxcpu = pmc_cpu_max(); /* allocate space for the per-cpu array */ pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, M_WAITOK | M_ZERO); /* per-cpu 'saved values' for managing process-mode PMCs */ pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK); /* Perform CPU-dependent initialization. */ pmc_save_cpu_binding(&pb); error = 0; for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pmc_select_cpu(cpu); pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, M_WAITOK | M_ZERO); for (n = 0; error == 0 && n < md->pmd_nclass; n++) if (md->pmd_classdep[n].pcd_num > 0) error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); } pmc_restore_cpu_binding(&pb); if (error != 0) return (error); /* allocate space for the sample array */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pc = pcpu_find(cpu); domain = pc->pc_domain; sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domainset(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); sb->ps_callchains = malloc_domainset(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); sb->ps_callchains = malloc_domainset(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + n * pmc_callchaindepth; pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb; } /* allocate space for the row disposition array */ pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, M_PMC, M_WAITOK | M_ZERO); /* mark all PMCs as available */ for (n = 0; n < md->pmd_npmc; n++) PMC_MARK_ROW_FREE(n); /* allocate thread hash tables */ pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, &pmc_ownerhashmask); pmc_processhash = hashinit(pmc_hashsize, M_PMC, &pmc_processhashmask); mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", MTX_SPIN); CK_LIST_INIT(&pmc_ss_owners); pmc_ss_count = 0; /* allocate a pool of spin mutexes */ pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, MTX_SPIN); PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); /* Initialize a spin mutex for the thread free list. */ mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf", MTX_SPIN); /* Initialize the task to prune the thread free list. */ TASK_INIT(&free_task, 0, pmc_thread_descriptor_pool_free_task, NULL); /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); /* register kld event handlers */ pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, NULL, EVENTHANDLER_PRI_ANY); pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, NULL, EVENTHANDLER_PRI_ANY); /* initialize logging */ pmclog_initialize(); /* set hook functions */ pmc_intr = md->pmd_intr; wmb(); pmc_hook = pmc_hook_handler; if (error == 0) { printf(PMC_MODULE_NAME ":"); for (n = 0; n < md->pmd_nclass; n++) { if (md->pmd_classdep[n].pcd_num == 0) continue; pcd = &md->pmd_classdep[n]; printf(" %s/%d/%d/0x%b", pmc_name_of_pmcclass(pcd->pcd_class), pcd->pcd_num, pcd->pcd_width, pcd->pcd_caps, "\20" "\1INT\2USR\3SYS\4EDG\5THR" "\6REA\7WRI\10INV\11QUA\12PRC" "\13TAG\14CSC"); } printf("\n"); } return (error); } /* prepare to be unloaded */ static void pmc_cleanup(void) { struct pmc_binding pb; struct pmc_owner *po, *tmp; struct pmc_ownerhash *ph; struct pmc_processhash *prh __pmcdbg_used; u_int maxcpu; int cpu, c; PMCDBG0(MOD,INI,0, "cleanup"); /* switch off sampling */ CPU_FOREACH(cpu) DPCPU_ID_SET(cpu, pmc_sampled, 0); pmc_intr = NULL; sx_xlock(&pmc_sx); if (pmc_hook == NULL) { /* being unloaded already */ sx_xunlock(&pmc_sx); return; } pmc_hook = NULL; /* prevent new threads from entering module */ /* deregister event handlers */ EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); /* send SIGBUS to all owner threads, free up allocations */ if (pmc_ownerhash != NULL) { for (ph = pmc_ownerhash; ph <= &pmc_ownerhash[pmc_ownerhashmask]; ph++) { LIST_FOREACH_SAFE(po, ph, po_next, tmp) { pmc_remove_owner(po); PMCDBG3(MOD,INI,2, "cleanup signal proc=%p (%d, %s)", po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); PROC_LOCK(po->po_owner); kern_psignal(po->po_owner, SIGBUS); PROC_UNLOCK(po->po_owner); pmc_destroy_owner_descriptor(po); } } } /* reclaim allocated data structures */ taskqueue_drain(taskqueue_fast, &free_task); mtx_destroy(&pmc_threadfreelist_mtx); pmc_thread_descriptor_pool_drain(); if (pmc_mtxpool != NULL) mtx_pool_destroy(&pmc_mtxpool); mtx_destroy(&pmc_processhash_mtx); if (pmc_processhash != NULL) { #ifdef HWPMC_DEBUG struct pmc_process *pp; PMCDBG0(MOD,INI,3, "destroy process hash"); for (prh = pmc_processhash; prh <= &pmc_processhash[pmc_processhashmask]; prh++) LIST_FOREACH(pp, prh, pp_next) PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); #endif hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); pmc_processhash = NULL; } if (pmc_ownerhash != NULL) { PMCDBG0(MOD,INI,3, "destroy owner hash"); hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); pmc_ownerhash = NULL; } KASSERT(CK_LIST_EMPTY(&pmc_ss_owners), ("[pmc,%d] Global SS owner list not empty", __LINE__)); KASSERT(pmc_ss_count == 0, ("[pmc,%d] Global SS count not empty", __LINE__)); /* do processor and pmc-class dependent cleanup */ maxcpu = pmc_cpu_max(); PMCDBG0(MOD,INI,3, "md cleanup"); if (md) { pmc_save_cpu_binding(&pb); for (cpu = 0; cpu < maxcpu; cpu++) { PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", cpu, pmc_pcpu[cpu]); if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) continue; pmc_select_cpu(cpu); for (c = 0; c < md->pmd_nclass; c++) { if (md->pmd_classdep[c].pcd_num > 0) { md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); } } } if (md->pmd_cputype == PMC_CPU_GENERIC) pmc_generic_cpu_finalize(md); else pmc_md_finalize(md); pmc_mdep_free(md); md = NULL; pmc_restore_cpu_binding(&pb); } /* Free per-cpu descriptors. */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL, ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__, cpu)); free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC); free(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); pmc_pcpu = NULL; free(pmc_pcpu_saved, M_PMC); pmc_pcpu_saved = NULL; if (pmc_pmcdisp != NULL) { free(pmc_pmcdisp, M_PMC); pmc_pmcdisp = NULL; } if (pmc_rowindex_to_classdep != NULL) { free(pmc_rowindex_to_classdep, M_PMC); pmc_rowindex_to_classdep = NULL; } pmclog_shutdown(); counter_u64_free(pmc_stats.pm_intr_ignored); counter_u64_free(pmc_stats.pm_intr_processed); counter_u64_free(pmc_stats.pm_intr_bufferfull); counter_u64_free(pmc_stats.pm_syscalls); counter_u64_free(pmc_stats.pm_syscall_errors); counter_u64_free(pmc_stats.pm_buffer_requests); counter_u64_free(pmc_stats.pm_buffer_requests_failed); counter_u64_free(pmc_stats.pm_log_sweeps); counter_u64_free(pmc_stats.pm_merges); counter_u64_free(pmc_stats.pm_overwrites); sx_xunlock(&pmc_sx); /* we are done */ } /* * The function called at load/unload. */ static int load(struct module *module __unused, int cmd, void *arg __unused) { int error; error = 0; switch (cmd) { case MOD_LOAD: /* initialize the subsystem */ error = pmc_initialize(); if (error != 0) break; PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d", pmc_syscall_num, pmc_cpu_max()); break; case MOD_UNLOAD: case MOD_SHUTDOWN: pmc_cleanup(); PMCDBG0(MOD,INI,1, "unloaded"); break; default: error = EINVAL; break; } return (error); } diff --git a/sys/dev/hwpmc/hwpmc_power8.c b/sys/dev/hwpmc/hwpmc_power8.c index fc6b878eff7d..d7ccbc5c6c0a 100644 --- a/sys/dev/hwpmc/hwpmc_power8.c +++ b/sys/dev/hwpmc/hwpmc_power8.c @@ -1,241 +1,244 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Justin Hibbits * Copyright (c) 2020 Leandro Lupori * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "hwpmc_powerpc.h" #define POWER8_MAX_PMCS 6 #define PM_EVENT_CODE(pe) (pe & 0xffff) #define PM_EVENT_COUNTER(pe) ((pe >> 16) & 0xffff) #define PM_CYC 0x1e #define PM_INST_CMPL 0x02 static void power8_set_pmc(int cpu, int ri, int config) { register_t mmcr; /* Select event */ switch (ri) { case 0: case 1: case 2: case 3: mmcr = mfspr(SPR_MMCR1); mmcr &= ~SPR_MMCR1_P8_PMCNSEL_MASK(ri); mmcr |= SPR_MMCR1_P8_PMCNSEL(ri, config & ~POWERPC_PMC_ENABLE); mtspr(SPR_MMCR1, mmcr); break; } /* * By default, freeze counter in all states. * If counter is being started, unfreeze it in selected states. */ mmcr = mfspr(SPR_MMCR2) | SPR_MMCR2_FCNHSP(ri); if (config != PMCN_NONE) { if (config & POWERPC_PMC_USER_ENABLE) mmcr &= ~(SPR_MMCR2_FCNP0(ri) | SPR_MMCR2_FCNP1(ri)); if (config & POWERPC_PMC_KERNEL_ENABLE) mmcr &= ~(SPR_MMCR2_FCNH(ri) | SPR_MMCR2_FCNS(ri)); } mtspr(SPR_MMCR2, mmcr); } static int power8_pcpu_init(struct pmc_mdep *md, int cpu) { register_t mmcr0; int i; powerpc_pcpu_init(md, cpu); /* Freeze all counters before modifying PMC registers */ mmcr0 = mfspr(SPR_MMCR0) | SPR_MMCR0_FC; mtspr(SPR_MMCR0, mmcr0); /* * Now setup MMCR0: * - PMAO=0: clear alerts * - FCPC=0, FCP=0: don't freeze counters in problem state * - FCECE: Freeze Counters on Enabled Condition or Event * - PMC1CE/PMCNCE: PMC1/N Condition Enable */ mmcr0 &= ~(SPR_MMCR0_PMAO | SPR_MMCR0_FCPC | SPR_MMCR0_FCP); mmcr0 |= SPR_MMCR0_FCECE | SPR_MMCR0_PMC1CE | SPR_MMCR0_PMCNCE; mtspr(SPR_MMCR0, mmcr0); /* Clear all PMCs to prevent enabled condition interrupts */ for (i = 0; i < POWER8_MAX_PMCS; i++) powerpc_pmcn_write(i, 0); /* Disable events in PMCs 1-4 */ mtspr(SPR_MMCR1, mfspr(SPR_MMCR1) & ~SPR_MMCR1_P8_PMCSEL_ALL); /* Freeze each counter, in all states */ mtspr(SPR_MMCR2, mfspr(SPR_MMCR2) | SPR_MMCR2_FCNHSP(0) | SPR_MMCR2_FCNHSP(1) | SPR_MMCR2_FCNHSP(2) | SPR_MMCR2_FCNHSP(3) | SPR_MMCR2_FCNHSP(4) | SPR_MMCR2_FCNHSP(5)); /* Enable interrupts, unset global freeze */ mmcr0 &= ~SPR_MMCR0_FC; mmcr0 |= SPR_MMCR0_PMAE; mtspr(SPR_MMCR0, mmcr0); return (0); } static int power8_pcpu_fini(struct pmc_mdep *md, int cpu) { register_t mmcr0; /* Freeze counters, disable interrupts */ mmcr0 = mfspr(SPR_MMCR0); mmcr0 &= ~SPR_MMCR0_PMAE; mmcr0 |= SPR_MMCR0_FC; mtspr(SPR_MMCR0, mmcr0); return (powerpc_pcpu_fini(md, cpu)); } static void power8_resume_pmc(bool ie) { register_t mmcr0; /* Unfreeze counters and re-enable PERF exceptions if requested. */ mmcr0 = mfspr(SPR_MMCR0); mmcr0 &= ~(SPR_MMCR0_FC | SPR_MMCR0_PMAO | SPR_MMCR0_PMAE); if (ie) mmcr0 |= SPR_MMCR0_PMAE; mtspr(SPR_MMCR0, mmcr0); } static int power8_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint32_t caps, config, counter, pe; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < ppc_max_pmcs, ("[powerpc,%d] illegal row index %d", __LINE__, ri)); pe = a->pm_md.pm_event; counter = PM_EVENT_COUNTER(pe); config = PM_EVENT_CODE(pe); if (a->pm_class != PMC_CLASS_POWER8) return (EINVAL); + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + /* * PMC5 and PMC6 are not programmable and always count instructions * completed and cycles, respectively. * * When counter is 0 any of the 4 programmable PMCs may be used for * the specified event, otherwise it must match ri + 1. */ if (counter == 0 && config == PM_INST_CMPL) counter = 5; else if (counter == 0 && config == PM_CYC) counter = 6; else if (counter > 4) return (EINVAL); if (counter != 0 && counter != ri + 1) return (EINVAL); caps = a->pm_caps; if (caps & PMC_CAP_SYSTEM) config |= POWERPC_PMC_KERNEL_ENABLE; if (caps & PMC_CAP_USER) config |= POWERPC_PMC_USER_ENABLE; if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) config |= POWERPC_PMC_ENABLE; pm->pm_md.pm_powerpc.pm_powerpc_evsel = config; PMCDBG3(MDP,ALL,1,"powerpc-allocate cpu=%d ri=%d -> config=0x%x", cpu, ri, config); return (0); } int pmc_power8_initialize(struct pmc_mdep *pmc_mdep) { struct pmc_classdep *pcd; pmc_mdep->pmd_cputype = PMC_CPU_PPC_POWER8; pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC]; pcd->pcd_caps = POWERPC_PMC_CAPS; pcd->pcd_class = PMC_CLASS_POWER8; pcd->pcd_num = POWER8_MAX_PMCS; pcd->pcd_ri = pmc_mdep->pmd_npmc; pcd->pcd_width = 32; pcd->pcd_pcpu_init = power8_pcpu_init; pcd->pcd_pcpu_fini = power8_pcpu_fini; pcd->pcd_allocate_pmc = power8_allocate_pmc; pcd->pcd_release_pmc = powerpc_release_pmc; pcd->pcd_start_pmc = powerpc_start_pmc; pcd->pcd_stop_pmc = powerpc_stop_pmc; pcd->pcd_get_config = powerpc_get_config; pcd->pcd_config_pmc = powerpc_config_pmc; pcd->pcd_describe = powerpc_describe; pcd->pcd_read_pmc = powerpc_read_pmc; pcd->pcd_write_pmc = powerpc_write_pmc; pmc_mdep->pmd_npmc += POWER8_MAX_PMCS; pmc_mdep->pmd_intr = powerpc_pmc_intr; ppc_max_pmcs = POWER8_MAX_PMCS; powerpc_set_pmc = power8_set_pmc; powerpc_pmcn_read = powerpc_pmcn_read_default; powerpc_pmcn_write = powerpc_pmcn_write_default; powerpc_resume_pmc = power8_resume_pmc; return (0); } diff --git a/sys/dev/hwpmc/hwpmc_uncore.c b/sys/dev/hwpmc/hwpmc_uncore.c index c16800c14ce1..fd4266b605ef 100644 --- a/sys/dev/hwpmc/hwpmc_uncore.c +++ b/sys/dev/hwpmc/hwpmc_uncore.c @@ -1,757 +1,763 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2010 Fabien Thomas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Intel Uncore PMCs. */ #include #include #include #include #include #include #include #include #include #include #include #define UCF_PMC_CAPS \ (PMC_CAP_READ | PMC_CAP_WRITE) #define UCP_PMC_CAPS \ (PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) #define SELECTSEL(x) \ (((x) == PMC_CPU_INTEL_SANDYBRIDGE || (x) == PMC_CPU_INTEL_HASWELL) ? \ UCP_CB0_EVSEL0 : UCP_EVSEL0) #define SELECTOFF(x) \ (((x) == PMC_CPU_INTEL_SANDYBRIDGE || (x) == PMC_CPU_INTEL_HASWELL) ? \ UCF_OFFSET_SB : UCF_OFFSET) static enum pmc_cputype uncore_cputype; struct uncore_cpu { volatile uint32_t pc_ucfctrl; /* Fixed function control. */ volatile uint64_t pc_globalctrl; /* Global control register. */ struct pmc_hw pc_uncorepmcs[]; }; static struct uncore_cpu **uncore_pcpu; static uint64_t uncore_pmcmask; static int uncore_ucf_ri; /* relative index of fixed counters */ static int uncore_ucf_width; static int uncore_ucf_npmc; static int uncore_ucp_width; static int uncore_ucp_npmc; static int uncore_pcpu_noop(struct pmc_mdep *md, int cpu) { (void) md; (void) cpu; return (0); } static int uncore_pcpu_init(struct pmc_mdep *md, int cpu) { struct pmc_cpu *pc; struct uncore_cpu *cc; struct pmc_hw *phw; int uncore_ri, n, npmc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[ucf,%d] insane cpu number %d", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"uncore-init cpu=%d", cpu); uncore_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCP].pcd_ri; npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCP].pcd_num; npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCF].pcd_num; cc = malloc(sizeof(struct uncore_cpu) + npmc * sizeof(struct pmc_hw), M_PMC, M_WAITOK | M_ZERO); uncore_pcpu[cpu] = cc; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL && cc != NULL, ("[uncore,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu)); for (n = 0, phw = cc->pc_uncorepmcs; n < npmc; n++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n + uncore_ri); phw->phw_pmc = NULL; pc->pc_hwpmcs[n + uncore_ri] = phw; } return (0); } static int uncore_pcpu_fini(struct pmc_mdep *md, int cpu) { int uncore_ri, n, npmc; struct pmc_cpu *pc; struct uncore_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] insane cpu number (%d)", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"uncore-pcpu-fini cpu=%d", cpu); if ((cc = uncore_pcpu[cpu]) == NULL) return (0); uncore_pcpu[cpu] = NULL; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL, ("[uncore,%d] NULL per-cpu %d state", __LINE__, cpu)); npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCP].pcd_num; uncore_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCP].pcd_ri; for (n = 0; n < npmc; n++) wrmsr(SELECTSEL(uncore_cputype) + n, 0); wrmsr(UCF_CTRL, 0); npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCF].pcd_num; for (n = 0; n < npmc; n++) pc->pc_hwpmcs[n + uncore_ri] = NULL; free(cc, M_PMC); return (0); } /* * Fixed function counters. */ static pmc_value_t ucf_perfctr_value_to_reload_count(pmc_value_t v) { /* If the PMC has overflowed, return a reload count of zero. */ if ((v & (1ULL << (uncore_ucf_width - 1))) == 0) return (0); v &= (1ULL << uncore_ucf_width) - 1; return (1ULL << uncore_ucf_width) - v; } static pmc_value_t ucf_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << uncore_ucf_width) - rlc; } static int ucf_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint32_t flags; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU %d", __LINE__, cpu)); PMCDBG2(MDP,ALL,1, "ucf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps); if (ri < 0 || ri > uncore_ucf_npmc) return (EINVAL); if (a->pm_class != PMC_CLASS_UCF) return (EINVAL); + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + flags = UCF_EN; pm->pm_md.pm_ucf.pm_ucf_ctrl = (flags << (ri * 4)); PMCDBG1(MDP,ALL,2, "ucf-allocate config=0x%jx", (uintmax_t) pm->pm_md.pm_ucf.pm_ucf_ctrl); return (0); } static int ucf_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "ucf-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(uncore_pcpu[cpu] != NULL, ("[uncore,%d] null per-cpu %d", __LINE__, cpu)); uncore_pcpu[cpu]->pc_uncorepmcs[ri + uncore_ucf_ri].phw_pmc = pm; return (0); } static int ucf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { struct pmc_hw *phw; phw = &uncore_pcpu[cpu]->pc_uncorepmcs[ri + uncore_ucf_ri]; snprintf(pi->pm_name, sizeof(pi->pm_name), "UCF-%d", ri); pi->pm_class = PMC_CLASS_UCF; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int ucf_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = uncore_pcpu[cpu]->pc_uncorepmcs[ri + uncore_ucf_ri].phw_pmc; return (0); } static int ucf_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); tmp = rdmsr(UCF_CTR0 + ri); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = ucf_perfctr_value_to_reload_count(tmp); else *v = tmp; PMCDBG3(MDP,REA,1, "ucf-read cpu=%d ri=%d -> v=%jx", cpu, ri, *v); return (0); } static int ucf_release_pmc(int cpu, int ri, struct pmc *pmc) { PMCDBG3(MDP,REL,1, "ucf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); KASSERT(uncore_pcpu[cpu]->pc_uncorepmcs[ri + uncore_ucf_ri].phw_pmc == NULL, ("[uncore,%d] PHW pmc non-NULL", __LINE__)); return (0); } static int ucf_start_pmc(int cpu, int ri, struct pmc *pm) { struct uncore_cpu *ucfc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); PMCDBG2(MDP,STA,1,"ucf-start cpu=%d ri=%d", cpu, ri); ucfc = uncore_pcpu[cpu]; ucfc->pc_ucfctrl |= pm->pm_md.pm_ucf.pm_ucf_ctrl; wrmsr(UCF_CTRL, ucfc->pc_ucfctrl); ucfc->pc_globalctrl |= (1ULL << (ri + SELECTOFF(uncore_cputype))); wrmsr(UC_GLOBAL_CTRL, ucfc->pc_globalctrl); PMCDBG4(MDP,STA,1,"ucfctrl=%x(%x) globalctrl=%jx(%jx)", ucfc->pc_ucfctrl, (uint32_t) rdmsr(UCF_CTRL), ucfc->pc_globalctrl, rdmsr(UC_GLOBAL_CTRL)); return (0); } static int ucf_stop_pmc(int cpu, int ri, struct pmc *pm __unused) { uint32_t fc; struct uncore_cpu *ucfc; PMCDBG2(MDP,STO,1,"ucf-stop cpu=%d ri=%d", cpu, ri); ucfc = uncore_pcpu[cpu]; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); fc = (UCF_MASK << (ri * 4)); ucfc->pc_ucfctrl &= ~fc; PMCDBG1(MDP,STO,1,"ucf-stop ucfctrl=%x", ucfc->pc_ucfctrl); wrmsr(UCF_CTRL, ucfc->pc_ucfctrl); /* Don't need to write UC_GLOBAL_CTRL, one disable is enough. */ PMCDBG4(MDP,STO,1,"ucfctrl=%x(%x) globalctrl=%jx(%jx)", ucfc->pc_ucfctrl, (uint32_t) rdmsr(UCF_CTRL), ucfc->pc_globalctrl, rdmsr(UC_GLOBAL_CTRL)); return (0); } static int ucf_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { struct uncore_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucf_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); cc = uncore_pcpu[cpu]; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = ucf_reload_count_to_perfctr_value(v); wrmsr(UCF_CTRL, 0); /* Turn off fixed counters */ wrmsr(UCF_CTR0 + ri, v); wrmsr(UCF_CTRL, cc->pc_ucfctrl); PMCDBG4(MDP,WRI,1, "ucf-write cpu=%d ri=%d v=%jx ucfctrl=%jx ", cpu, ri, v, (uintmax_t) rdmsr(UCF_CTRL)); return (0); } static void ucf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[ucf,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "ucf-initialize"); pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCF]; pcd->pcd_caps = UCF_PMC_CAPS; pcd->pcd_class = PMC_CLASS_UCF; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = ucf_allocate_pmc; pcd->pcd_config_pmc = ucf_config_pmc; pcd->pcd_describe = ucf_describe; pcd->pcd_get_config = ucf_get_config; pcd->pcd_get_msr = NULL; pcd->pcd_pcpu_fini = uncore_pcpu_noop; pcd->pcd_pcpu_init = uncore_pcpu_noop; pcd->pcd_read_pmc = ucf_read_pmc; pcd->pcd_release_pmc = ucf_release_pmc; pcd->pcd_start_pmc = ucf_start_pmc; pcd->pcd_stop_pmc = ucf_stop_pmc; pcd->pcd_write_pmc = ucf_write_pmc; md->pmd_npmc += npmc; } /* * Intel programmable PMCs. */ /* * Event descriptor tables. * * For each event id, we track: * * 1. The CPUs that the event is valid for. * * 2. If the event uses a fixed UMASK, the value of the umask field. * If the event doesn't use a fixed UMASK, a mask of legal bits * to check against. */ struct ucp_event_descr { enum pmc_event ucp_ev; unsigned char ucp_evcode; unsigned char ucp_umask; unsigned char ucp_flags; }; #define UCP_F_I7 (1 << 0) /* CPU: Core i7 */ #define UCP_F_WM (1 << 1) /* CPU: Westmere */ #define UCP_F_SB (1 << 2) /* CPU: Sandy Bridge */ #define UCP_F_HW (1 << 3) /* CPU: Haswell */ #define UCP_F_FM (1 << 4) /* Fixed mask */ #define UCP_F_ALLCPUS \ (UCP_F_I7 | UCP_F_WM) #define UCP_F_CMASK 0xFF000000 static pmc_value_t ucp_perfctr_value_to_reload_count(pmc_value_t v) { v &= (1ULL << uncore_ucp_width) - 1; return (1ULL << uncore_ucp_width) - v; } static pmc_value_t ucp_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << uncore_ucp_width) - rlc; } /* * Counter specific event information for Sandybridge and Haswell */ static int ucp_event_sb_hw_ok_on_counter(uint8_t ev, int ri) { uint32_t mask; switch (ev) { /* * Events valid only on counter 0. */ case 0x80: case 0x83: mask = (1 << 0); break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int ucp_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { uint8_t ev; const struct pmc_md_ucp_op_pmcallocate *ucp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row-index value %d", __LINE__, ri)); if (a->pm_class != PMC_CLASS_UCP) return (EINVAL); + if ((a->pm_flags & PMC_F_EV_PMU) == 0) + return (EINVAL); + ucp = &a->pm_md.pm_ucp; ev = UCP_EVSEL(ucp->pm_ucp_config); switch (uncore_cputype) { case PMC_CPU_INTEL_HASWELL: case PMC_CPU_INTEL_SANDYBRIDGE: if (ucp_event_sb_hw_ok_on_counter(ev, ri) == 0) return (EINVAL); break; default: break; } pm->pm_md.pm_ucp.pm_ucp_evsel = ucp->pm_ucp_config | UCP_EN; return (0); } static int ucp_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "ucp-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(uncore_pcpu[cpu] != NULL, ("[uncore,%d] null per-cpu %d", __LINE__, cpu)); uncore_pcpu[cpu]->pc_uncorepmcs[ri].phw_pmc = pm; return (0); } static int ucp_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { struct pmc_hw *phw; phw = &uncore_pcpu[cpu]->pc_uncorepmcs[ri]; snprintf(pi->pm_name, sizeof(pi->pm_name), "UCP-%d", ri); pi->pm_class = PMC_CLASS_UCP; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int ucp_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = uncore_pcpu[cpu]->pc_uncorepmcs[ri].phw_pmc; return (0); } static int ucp_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) { pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); tmp = rdmsr(UCP_PMC0 + ri); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = ucp_perfctr_value_to_reload_count(tmp); else *v = tmp; PMCDBG4(MDP,REA,1, "ucp-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri, ri, *v); return (0); } static int ucp_release_pmc(int cpu, int ri, struct pmc *pm) { (void) pm; PMCDBG3(MDP,REL,1, "ucp-release cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); KASSERT(uncore_pcpu[cpu]->pc_uncorepmcs[ri].phw_pmc == NULL, ("[uncore,%d] PHW pmc non-NULL", __LINE__)); return (0); } static int ucp_start_pmc(int cpu, int ri, struct pmc *pm) { uint64_t evsel; struct uncore_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row-index %d", __LINE__, ri)); cc = uncore_pcpu[cpu]; PMCDBG2(MDP,STA,1, "ucp-start cpu=%d ri=%d", cpu, ri); evsel = pm->pm_md.pm_ucp.pm_ucp_evsel; PMCDBG4(MDP,STA,2, "ucp-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x", cpu, ri, SELECTSEL(uncore_cputype) + ri, evsel); wrmsr(SELECTSEL(uncore_cputype) + ri, evsel); cc->pc_globalctrl |= (1ULL << ri); wrmsr(UC_GLOBAL_CTRL, cc->pc_globalctrl); return (0); } static int ucp_stop_pmc(int cpu, int ri, struct pmc *pm __unused) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row index %d", __LINE__, ri)); PMCDBG2(MDP,STO,1, "ucp-stop cpu=%d ri=%d", cpu, ri); /* stop hw. */ wrmsr(SELECTSEL(uncore_cputype) + ri, 0); /* Don't need to write UC_GLOBAL_CTRL, one disable is enough. */ return (0); } static int ucp_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[uncore,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < uncore_ucp_npmc, ("[uncore,%d] illegal row index %d", __LINE__, ri)); PMCDBG4(MDP,WRI,1, "ucp-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri, UCP_PMC0 + ri, v); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = ucp_reload_count_to_perfctr_value(v); /* * Write the new value to the counter. The counter will be in * a stopped state when the pcd_write() entry point is called. */ wrmsr(UCP_PMC0 + ri, v); return (0); } static void ucp_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[ucp,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "ucp-initialize"); pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_UCP]; pcd->pcd_caps = UCP_PMC_CAPS; pcd->pcd_class = PMC_CLASS_UCP; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = ucp_allocate_pmc; pcd->pcd_config_pmc = ucp_config_pmc; pcd->pcd_describe = ucp_describe; pcd->pcd_get_config = ucp_get_config; pcd->pcd_get_msr = NULL; pcd->pcd_pcpu_fini = uncore_pcpu_fini; pcd->pcd_pcpu_init = uncore_pcpu_init; pcd->pcd_read_pmc = ucp_read_pmc; pcd->pcd_release_pmc = ucp_release_pmc; pcd->pcd_start_pmc = ucp_start_pmc; pcd->pcd_stop_pmc = ucp_stop_pmc; pcd->pcd_write_pmc = ucp_write_pmc; md->pmd_npmc += npmc; } int pmc_uncore_initialize(struct pmc_mdep *md, int maxcpu) { uncore_cputype = md->pmd_cputype; uncore_pmcmask = 0; /* * Initialize programmable counters. */ uncore_ucp_npmc = 8; uncore_ucp_width = 48; uncore_pmcmask |= ((1ULL << uncore_ucp_npmc) - 1); ucp_initialize(md, maxcpu, uncore_ucp_npmc, uncore_ucp_width); /* * Initialize fixed function counters, if present. */ uncore_ucf_ri = uncore_ucp_npmc; uncore_ucf_npmc = 1; uncore_ucf_width = 48; ucf_initialize(md, maxcpu, uncore_ucf_npmc, uncore_ucf_width); uncore_pmcmask |= ((1ULL << uncore_ucf_npmc) - 1) << SELECTOFF(uncore_cputype); PMCDBG2(MDP,INI,1,"uncore-init pmcmask=0x%jx ucfri=%d", uncore_pmcmask, uncore_ucf_ri); uncore_pcpu = malloc(sizeof(*uncore_pcpu) * maxcpu, M_PMC, M_ZERO | M_WAITOK); return (0); } void pmc_uncore_finalize(struct pmc_mdep *md) { PMCDBG0(MDP,INI,1, "uncore-finalize"); free(uncore_pcpu, M_PMC); uncore_pcpu = NULL; } diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index a4d03efb6aac..714d8a7c65b7 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -1,1232 +1,1240 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #include #include #ifdef _KERNEL #include #include #endif #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ #define PMC_VERSION_MAJOR 0x0A #define PMC_VERSION_MINOR 0x00 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) #define PMC_CPUID_LEN 64 /* cpu model name for pmu lookup */ extern char pmc_cpuid[PMC_CPUID_LEN]; /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. * * Please keep the pmc(3) manual page in sync with this list. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K7, 0x00, "AMD K7") \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \ __PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \ __PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \ __PMC_CPU(INTEL_ATOM_GOLDMONT, 0x9A, "Intel Atom Goldmont") \ __PMC_CPU(INTEL_ICELAKE, 0x9B, "Intel Icelake") \ __PMC_CPU(INTEL_ICELAKE_XEON, 0x9C, "Intel Icelake Xeon") \ __PMC_CPU(INTEL_ALDERLAKE, 0x9D, "Intel Alderlake") \ __PMC_CPU(INTEL_ATOM_GOLDMONT_P, 0x9E, "Intel Atom Goldmont Plus") \ __PMC_CPU(INTEL_ATOM_TREMONT, 0x9F, "Intel Atom Tremont") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(PPC_POWER8, 0x390, "IBM POWER8") \ __PMC_CPU(GENERIC, 0x400, "Generic") \ __PMC_CPU(ARMV7_CORTEX_A5, 0x500, "ARMv7 Cortex A5") \ __PMC_CPU(ARMV7_CORTEX_A7, 0x501, "ARMv7 Cortex A7") \ __PMC_CPU(ARMV7_CORTEX_A8, 0x502, "ARMv7 Cortex A8") \ __PMC_CPU(ARMV7_CORTEX_A9, 0x503, "ARMv7 Cortex A9") \ __PMC_CPU(ARMV7_CORTEX_A15, 0x504, "ARMv7 Cortex A15") \ __PMC_CPU(ARMV7_CORTEX_A17, 0x505, "ARMv7 Cortex A17") \ __PMC_CPU(ARMV8_CORTEX_A53, 0x600, "ARMv8 Cortex A53") \ __PMC_CPU(ARMV8_CORTEX_A57, 0x601, "ARMv8 Cortex A57") \ __PMC_CPU(ARMV8_CORTEX_A76, 0x602, "ARMv8 Cortex A76") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K7 #define PMC_CPU_LAST PMC_CPU_ARMV8_CORTEX_A76 /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K7, 0x01, "AMD K7 performance counters") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ __PMC_CLASS(UCP, 0x09, "Intel Uncore programmable") \ __PMC_CLASS(XSCALE, 0x0A, "Intel XScale counters") \ __PMC_CLASS(PPC7450, 0x0D, "Motorola MPC7450 class") \ __PMC_CLASS(PPC970, 0x0E, "IBM PowerPC 970 class") \ __PMC_CLASS(SOFT, 0x0F, "Software events") \ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(E500, 0x13, "Freescale e500 class") \ __PMC_CLASS(POWER8, 0x15, "IBM POWER8 class") \ __PMC_CLASS(DMC620_PMU_CD2, 0x16, "ARM DMC620 Memory Controller PMU CLKDIV2") \ __PMC_CLASS(DMC620_PMU_C, 0x17, "ARM DMC620 Memory Controller PMU CLK") \ __PMC_CLASS(CMN600_PMU, 0x18, "Arm CoreLink CMN600 Coherent Mesh Network PMU") enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) PMC_CLASS_##S = V, __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_CMN600_PMU /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparison sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") \ __PMC_CAP(SYSWIDE, 12, "system wide counter") \ __PMC_CAP(DOMWIDE, 13, "NUMA domain wide counter") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_DOMWIDE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ #define PMC_F_UNUSED1 0x00000001 /* unused */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ #define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */ +/* V10 API */ +#define PMC_F_EV_PMU 0x00000200 /* + * OP ALLOCATE: pm_ev has special + * userspace meaning; counter + * configuration is communicated + * through class-dependent fields + */ + /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * * +-----------------------+-------+-----------+ * | CPU | PMC MODE | CLASS | ROW INDEX | * +-----------------------+-------+-----------+ * * where CPU is 12 bits, MODE 4, CLASS 8, and ROW INDEX 8 Field 'CPU' * is set to the requested CPU for system-wide PMCs or PMC_CPU_ANY for * process-mode PMCs. Field 'PMC MODE' is the allocated PMC mode. * Field 'PMC CLASS' is the class of the PMC. Field 'ROW INDEX' is the * row index for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) #define PMC_ID_TO_CLASS(ID) (((ID) & 0xFF00) >> 8) #define PMC_ID_TO_MODE(ID) (((ID) & 0xF0000) >> 16) #define PMC_ID_TO_CPU(ID) (((ID) & 0xFFF00000) >> 20) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ ((((CPU) & 0xFFF) << 20) | (((MODE) & 0xF) << 16) | \ (((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ pmc_value_t pm_count; /* initial/sample count */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ #ifdef _KERNEL struct pmc_driverstats { counter_u64_t pm_intr_ignored; /* #interrupts ignored */ counter_u64_t pm_intr_processed; /* #interrupts processed */ counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */ counter_u64_t pm_syscalls; /* #syscalls */ counter_u64_t pm_syscall_errors; /* #syscalls with errors */ counter_u64_t pm_buffer_requests; /* #buffer requests */ counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ counter_u64_t pm_log_sweeps; /* #sample buffer processing passes */ counter_u64_t pm_merges; /* merged k+u */ counter_u64_t pm_overwrites; /* UR overwrites */ }; #endif struct pmc_op_getdriverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ unsigned int pm_intr_processed; /* #interrupts processed */ unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ unsigned int pm_syscalls; /* #syscalls */ unsigned int pm_syscall_errors; /* #syscalls with errors */ unsigned int pm_buffer_requests; /* #buffer requests */ unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ unsigned int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address associated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 256 #define PMC_NLOGBUFFERS_PCPU 32 #define PMC_NSAMPLES 256 #define PMC_CALLCHAIN_DEPTH 128 #define PMC_THREADLIST_MAX 128 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) * (g) - global_epoch_preempt (epoch) * (p) - pmc_sx (sx) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc_pcpu_state { uint32_t pps_overflowcnt; /* count overflow interrupts */ uint8_t pps_stalled; uint8_t pps_cpustate; } __aligned(CACHE_LINE_SIZE); struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; struct pmc_pcpu_state *pm_pcpu_state; volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ counter_u64_t pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ enum pmc_class pm_class; /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_threadpmcstate * * Record per-PMC, per-thread state. */ struct pmc_threadpmcstate { pmc_value_t pt_pmcval; /* per-thread reload count */ }; /* * struct pmc_thread * * Record a 'target' thread being profiled. */ struct pmc_thread { LIST_ENTRY(pmc_thread) pt_next; /* linked list */ struct thread *pt_td; /* target thread */ struct pmc_threadpmcstate pt_pmcs[]; /* per-PMC state */ }; /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ LIST_HEAD(,pmc_thread) pp_tds; /* list of threads */ struct mtx *pp_tdslock; /* lock on pp_tds thread list */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target process */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ CK_LIST_ENTRY(pmc_owner) po_ssnext; /* (g/p) list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint16_t ps_nsamples_actual; uint16_t ps_cpu; /* cpu number */ uint16_t ps_flags; /* other flags */ lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ int ps_ticks; /* ticks at sample time */ /* pad */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ uint64_t ps_tsc; /* tsc value */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_USER_CALLCHAIN_PENDING ((uint16_t) 0xFFFF) struct pmc_samplebuffer { volatile uint64_t ps_prodidx; /* producer index */ volatile uint64_t ps_considx; /* consumer index */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; #define PMC_CONS_SAMPLE(psb) \ (&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask]) #define PMC_CONS_SAMPLE_OFF(psb, off) \ (&(psb)->ps_samples[(off) & pmc_sample_mask]) #define PMC_PROD_SAMPLE(psb) \ (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask]) /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[3]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ u_char pb_priority; /* Thread active priority. */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, struct pmc *_pm, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, struct pmc *_pm, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_stop_pmc)(int _cpu, int _ri, struct pmc *_pm); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_driverstats pmc_stats; #if defined(HWPMC_DEBUG) /* HWPMC_DEBUG without KTR will compile but is a no-op. */ #if !defined(KTR) || !defined(KTR_COMPILE) || ((KTR_COMPILE & KTR_SUBSYS) == 0) #error "HWPMC_DEBUG requires KTR and KTR_COMPILE=KTR_SUBSYS -- see ktr(4)" #endif #include #define __pmcdbg_used /* unused variable annotation */ /* * Debug flags, major flag groups. * * Please keep the DEBUGGING section of the hwpmc(4) man page in sync. */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define KTR_PMC KTR_SUBSYS #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG0(M, N, L, F) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR0(KTR_PMC, #M ":" #N ":" #L ": " F); \ } while (0) #define PMCDBG1(M, N, L, F, p1) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR1(KTR_PMC, #M ":" #N ":" #L ": " F, p1); \ } while (0) #define PMCDBG2(M, N, L, F, p1, p2) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR2(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2); \ } while (0) #define PMCDBG3(M, N, L, F, p1, p2, p3) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR3(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3); \ } while (0) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR4(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4);\ } while (0) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR5(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5); \ } while (0) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR6(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5, p6); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define __pmcdbg_used __unused #define PMCDBG0(M, N, L, F) /* nothing */ #define PMCDBG1(M, N, L, F, p1) #define PMCDBG2(M, N, L, F, p1, p2) #define PMCDBG3(M, N, L, F, p1, p2, p3) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _ring, struct pmc *_pm, struct trapframe *_tf); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); void pmc_restore_cpu_binding(struct pmc_binding *pb); void pmc_save_cpu_binding(struct pmc_binding *pb); void pmc_select_cpu(int cpu); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); uint64_t pmc_rdtsc(void); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */