Index: head/lib/libpmc/libpmc_pmu_util.c =================================================================== --- head/lib/libpmc/libpmc_pmu_util.c (revision 334595) +++ head/lib/libpmc/libpmc_pmu_util.c (revision 334596) @@ -1,479 +1,467 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include #include "pmu-events/pmu-events.h" #if defined(__amd64__) || defined(__i386__) struct pmu_alias { const char *pa_alias; const char *pa_name; }; static struct pmu_alias pmu_alias_table[] = { {"UNHALTED_CORE_CYCLES", "CPU_CLK_UNHALTED.THREAD_P_ANY"}, {"UNHALTED-CORE-CYCLES", "CPU_CLK_UNHALTED.THREAD_P_ANY"}, {"LLC_MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC-MISSES", "LONGEST_LAT_CACHE.MISS"}, {"LLC_REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC-REFERENCE", "LONGEST_LAT_CACHE.REFERENCE"}, {"LLC_MISS_RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"LLC-MISS-RHITM", "mem_load_l3_miss_retired.remote_hitm"}, {"RESOURCE_STALL", "RESOURCE_STALLS.ANY"}, {"RESOURCE_STALLS_ANY", "RESOURCE_STALLS.ANY"}, {"BRANCH_INSTRUCTION_RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH-INSTRUCTION-RETIRED", "BR_INST_RETIRED.ALL_BRANCHES"}, {"BRANCH_MISSES_RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"BRANCH-MISSES-RETIRED", "BR_MISP_RETIRED.ALL_BRANCHES"}, {"cycles", "tsc-tsc"}, {"instructions", "inst-retired.any_p"}, {"branch-mispredicts", "br_misp_retired.all_branches" }, {"branches", "br_inst_retired.all_branches" }, {"interrupts", "hw_interrupts.received"}, {"ic-misses", "frontend_retired.l1i_miss"}, {NULL, NULL}, }; -static const char *fixed_mode_cntrs[] = { - "inst_retired.any", - "cpu_clk_unhalted.thread", - "cpu_clk_unhalted.thread_any", - "cpu_clk_unhalted.ref_tsc", - NULL -}; +/* + * The Intel fixed mode counters are: + * "inst_retired.any", + * "cpu_clk_unhalted.thread", + * "cpu_clk_unhalted.thread_any", + * "cpu_clk_unhalted.ref_tsc", + * + */ static const char * pmu_alias_get(const char *name) { struct pmu_alias *pa; for (pa = pmu_alias_table; pa->pa_alias != NULL; pa++) if (strcasecmp(name, pa->pa_alias) == 0) return (pa->pa_name); return (name); } struct pmu_event_desc { uint64_t ped_period; uint64_t ped_offcore_rsp; uint32_t ped_event; uint32_t ped_frontend; uint32_t ped_ldlat; uint32_t ped_config1; - uint8_t ped_umask; + int16_t ped_umask; uint8_t ped_cmask; uint8_t ped_any; uint8_t ped_inv; uint8_t ped_edge; uint8_t ped_fc_mask; uint8_t ped_ch_mask; }; static const struct pmu_events_map * pmu_events_map_get(void) { size_t s; char buf[64]; const struct pmu_events_map *pme; if (sysctlbyname("kern.hwpmc.cpuid", (void *)NULL, &s, (void *)NULL, 0) == -1) return (NULL); if (sysctlbyname("kern.hwpmc.cpuid", buf, &s, (void *)NULL, 0) == -1) return (NULL); for (pme = pmu_events_map; pme->cpuid != NULL; pme++) if (strcmp(buf, pme->cpuid) == 0) return (pme); return (NULL); } static const struct pmu_event * pmu_event_get(const char *event_name, int *idx) { const struct pmu_events_map *pme; const struct pmu_event *pe; int i; if ((pme = pmu_events_map_get()) == NULL) return (NULL); for (i = 0, pe = pme->table; pe->name || pe->desc || pe->event; pe++, i++) { if (pe->name == NULL) continue; if (strcasecmp(pe->name, event_name) == 0) { if (idx) *idx = i; return (pe); } } return (NULL); } const char * pmc_pmu_event_get_by_idx(int idx) { const struct pmu_events_map *pme; const struct pmu_event *pe; int i; if ((pme = pmu_events_map_get()) == NULL) return (NULL); for (i = 0, pe = pme->table; (pe->name || pe->desc || pe->event) && i < idx; pe++, i++); return (pe->name); } static int pmu_parse_event(struct pmu_event_desc *ped, const char *eventin) { char *event; char *kvp, *key, *value, *r; char *debug; if ((event = strdup(eventin)) == NULL) return (ENOMEM); r = event; bzero(ped, sizeof(*ped)); + ped->ped_umask = -1; while ((kvp = strsep(&event, ",")) != NULL) { key = strsep(&kvp, "="); if (key == NULL) abort(); value = kvp; if (strcmp(key, "umask") == 0) ped->ped_umask = strtol(value, NULL, 16); else if (strcmp(key, "event") == 0) ped->ped_event = strtol(value, NULL, 16); else if (strcmp(key, "period") == 0) ped->ped_period = strtol(value, NULL, 10); else if (strcmp(key, "offcore_rsp") == 0) ped->ped_offcore_rsp = strtol(value, NULL, 16); else if (strcmp(key, "any") == 0) ped->ped_any = strtol(value, NULL, 10); else if (strcmp(key, "cmask") == 0) ped->ped_cmask = strtol(value, NULL, 10); else if (strcmp(key, "inv") == 0) ped->ped_inv = strtol(value, NULL, 10); else if (strcmp(key, "edge") == 0) ped->ped_edge = strtol(value, NULL, 10); else if (strcmp(key, "frontend") == 0) ped->ped_frontend = strtol(value, NULL, 16); else if (strcmp(key, "ldlat") == 0) ped->ped_ldlat = strtol(value, NULL, 16); else if (strcmp(key, "fc_mask") == 0) ped->ped_fc_mask = strtol(value, NULL, 16); else if (strcmp(key, "ch_mask") == 0) ped->ped_ch_mask = strtol(value, NULL, 16); else if (strcmp(key, "config1") == 0) ped->ped_config1 = strtol(value, NULL, 16); else { debug = getenv("PMUDEBUG"); if (debug != NULL && strcmp(debug, "true") == 0 && value != NULL) printf("unrecognized kvpair: %s:%s\n", key, value); } } free(r); return (0); } uint64_t pmc_pmu_sample_rate_get(const char *event_name) { const struct pmu_event *pe; struct pmu_event_desc ped; event_name = pmu_alias_get(event_name); if ((pe = pmu_event_get(event_name, NULL)) == NULL) return (DEFAULT_SAMPLE_COUNT); if (pe->alias && (pe = pmu_event_get(pe->alias, NULL)) == NULL) return (DEFAULT_SAMPLE_COUNT); if (pe->event == NULL) return (DEFAULT_SAMPLE_COUNT); if (pmu_parse_event(&ped, pe->event)) return (DEFAULT_SAMPLE_COUNT); return (ped.ped_period); } int pmc_pmu_enabled(void) { return (pmu_events_map_get() != NULL); } void pmc_pmu_print_counters(const char *event_name) { const struct pmu_events_map *pme; const struct pmu_event *pe; struct pmu_event_desc ped; char *debug; int do_debug; debug = getenv("PMUDEBUG"); do_debug = 0; if (debug != NULL && strcmp(debug, "true") == 0) do_debug = 1; if ((pme = pmu_events_map_get()) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (event_name != NULL && strcasestr(pe->name, event_name) == NULL) continue; printf("\t%s\n", pe->name); if (do_debug) pmu_parse_event(&ped, pe->event); } } void pmc_pmu_print_counter_desc(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get()) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL && pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } void pmc_pmu_print_counter_desc_long(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get()) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) != NULL) { if (pe->long_desc != NULL) printf("%s:\n%s\n", pe->name, pe->long_desc); else if (pe->desc != NULL) printf("%s:\t%s\n", pe->name, pe->desc); } } } void pmc_pmu_print_counter_full(const char *ev) { const struct pmu_events_map *pme; const struct pmu_event *pe; if ((pme = pmu_events_map_get()) == NULL) return; for (pe = pme->table; pe->name || pe->desc || pe->event; pe++) { if (pe->name == NULL) continue; if (strcasestr(pe->name, ev) == NULL) continue; printf("name: %s\n", pe->name); if (pe->long_desc != NULL) printf("desc: %s\n", pe->long_desc); else if (pe->desc != NULL) printf("desc: %s\n", pe->desc); if (pe->event != NULL) printf("event: %s\n", pe->event); if (pe->topic != NULL) printf("topic: %s\n", pe->topic); if (pe->pmu != NULL) printf("pmu: %s\n", pe->pmu); if (pe->unit != NULL) printf("unit: %s\n", pe->unit); if (pe->perpkg != NULL) printf("perpkg: %s\n", pe->perpkg); if (pe->metric_expr != NULL) printf("metric_expr: %s\n", pe->metric_expr); if (pe->metric_name != NULL) printf("metric_name: %s\n", pe->metric_name); if (pe->metric_group != NULL) printf("metric_group: %s\n", pe->metric_group); } } int pmc_pmu_pmcallocate(const char *event_name, struct pmc_op_pmcallocate *pm) { const struct pmu_event *pe; struct pmu_event_desc ped; struct pmc_md_iap_op_pmcallocate *iap; - struct pmc_md_iaf_op_pmcallocate *iaf; int idx, isfixed; iap = &pm->pm_md.pm_iap; isfixed = 0; bzero(iap, sizeof(*iap)); event_name = pmu_alias_get(event_name); pm->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); if ((pe = pmu_event_get(event_name, &idx)) == NULL) return (ENOENT); if (pe->alias && (pe = pmu_event_get(pe->alias, &idx)) == NULL) return (ENOENT); if (pe->event == NULL) return (ENOENT); if (pmu_parse_event(&ped, pe->event)) return (ENOENT); - for (idx = 0; fixed_mode_cntrs[idx] != NULL; idx++) - if (strcmp(fixed_mode_cntrs[idx], event_name) == 0) - isfixed = 1; - if (isfixed) { - iaf = &pm->pm_md.pm_iaf; - pm->pm_class = PMC_CLASS_IAF; - if (strcasestr(pe->desc, "retired") != NULL) - pm->pm_ev = PMC_EV_IAF_INSTR_RETIRED_ANY; - else if (strcasestr(pe->desc, "core") != NULL || - strcasestr(pe->desc, "unhalted")) - pm->pm_ev = PMC_EV_IAF_CPU_CLK_UNHALTED_CORE; - else if (strcasestr(pe->desc, "ref") != NULL) - pm->pm_ev = PMC_EV_IAF_CPU_CLK_UNHALTED_REF; - iaf->pm_iaf_flags |= (IAF_USR | IAF_OS); - if (ped.ped_any) - iaf->pm_iaf_flags |= IAF_ANY; - if (pm->pm_caps & PMC_CAP_INTERRUPT) - iaf->pm_iaf_flags |= IAF_PMI; - return (0); - } else if (strcasestr(event_name, "UNC_") == event_name || - strcasestr(event_name, "uncore") != NULL) { + + if (strcasestr(event_name, "UNC_") == event_name || + strcasestr(event_name, "uncore") != NULL) { pm->pm_class = PMC_CLASS_UCP; - } else { pm->pm_caps |= PMC_CAP_QUALIFIER; + } else if ((ped.ped_umask == -1) || + (ped.ped_event == 0x0 && ped.ped_umask == 0x3)) { + pm->pm_class = PMC_CLASS_IAF; + } else { pm->pm_class = PMC_CLASS_IAP; + pm->pm_caps |= PMC_CAP_QUALIFIER; } pm->pm_ev = idx; iap->pm_iap_config |= IAP_EVSEL(ped.ped_event); - iap->pm_iap_config |= IAP_UMASK(ped.ped_umask); + if (ped.ped_umask > 0) + iap->pm_iap_config |= IAP_UMASK(ped.ped_umask); iap->pm_iap_config |= IAP_CMASK(ped.ped_cmask); iap->pm_iap_rsp = ped.ped_offcore_rsp; iap->pm_iap_config |= (IAP_USR | IAP_OS); if (ped.ped_edge) iap->pm_iap_config |= IAP_EDGE; if (ped.ped_any) iap->pm_iap_config |= IAP_ANY; if (ped.ped_inv) iap->pm_iap_config |= IAP_EDGE; if (pm->pm_caps & PMC_CAP_INTERRUPT) iap->pm_iap_config |= IAP_INT; return (0); } /* * Ultimately rely on AMD calling theirs the same */ static const char *stat_mode_cntrs[] = { "cpu_clk_unhalted.thread_any", "inst_retired.any", "br_inst_retired.all_branches", "br_misp_retired.all_branches", "longest_lat_cache.reference", "longest_lat_cache.miss", }; int pmc_pmu_stat_mode(const char ***cntrs) { if (pmc_pmu_enabled()) { *cntrs = stat_mode_cntrs; return (0); } return (EOPNOTSUPP); } #else uint64_t pmc_pmu_sample_rate_get(const char *event_name __unused) { return (DEFAULT_SAMPLE_COUNT); } void pmc_pmu_print_counters(const char *event_name __unused) { } void pmc_pmu_print_counter_desc(const char *e __unused) { } void pmc_pmu_print_counter_desc_long(const char *e __unused) { } void pmc_pmu_print_counter_full(const char *e __unused) { } int pmc_pmu_enabled(void) { return (0); } int pmc_pmu_pmcallocate(const char *e __unused, struct pmc_op_pmcallocate *p __unused) { return (EOPNOTSUPP); } const char * pmc_pmu_event_get_by_idx(int idx __unused) { return (NULL); } int pmc_pmu_stat_mode(const char ***a __unused) { return (EOPNOTSUPP); } #endif Index: head/lib/libpmc/pmclog.c =================================================================== --- head/lib/libpmc/pmclog.c (revision 334595) +++ head/lib/libpmc/pmclog.c (revision 334596) @@ -1,595 +1,597 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpmcinternal.h" #define PMCLOG_BUFFER_SIZE 4096 /* * API NOTES * * The pmclog(3) API is oriented towards parsing an event stream in * "realtime", i.e., from an data source that may or may not preserve * record boundaries -- for example when the data source is elsewhere * on a network. The API allows data to be fed into the parser zero * or more bytes at a time. * * The state for a log file parser is maintained in a 'struct * pmclog_parse_state'. Parser invocations are done by calling * 'pmclog_read()'; this function will inform the caller when a * complete event is parsed. * * The parser first assembles a complete log file event in an internal * work area (see "ps_saved" below). Once a complete log file event * is read, the parser then parses it and converts it to an event * descriptor usable by the client. We could possibly avoid this two * step process by directly parsing the input log to set fields in the * event record. However the parser's state machine would get * insanely complicated, and this code is unlikely to be used in * performance critical paths. */ enum pmclog_parser_state { PL_STATE_NEW_RECORD, /* in-between records */ PL_STATE_EXPECTING_HEADER, /* header being read */ PL_STATE_PARTIAL_RECORD, /* header present but not the record */ PL_STATE_ERROR /* parsing error encountered */ }; struct pmclog_parse_state { enum pmclog_parser_state ps_state; enum pmc_cputype ps_arch; /* log file architecture */ uint32_t ps_version; /* hwpmc version */ int ps_initialized; /* whether initialized */ int ps_count; /* count of records processed */ off_t ps_offset; /* stream byte offset */ union pmclog_entry ps_saved; /* saved partial log entry */ int ps_svcount; /* #bytes saved */ int ps_fd; /* active fd or -1 */ char *ps_buffer; /* scratch buffer if fd != -1 */ char *ps_data; /* current parse pointer */ size_t ps_len; /* length of buffered data */ }; #define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \ (* ((uint32_t *) &(PS)->ps_saved)) #define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A) #define PMCLOG_READ32(LE,V) do { \ (V) = *(LE)++; \ } while (0) #define PMCLOG_READ64(LE,V) do { \ uint64_t _v; \ _v = (uint64_t) *(LE)++; \ _v |= ((uint64_t) *(LE)++) << 32; \ (V) = _v; \ } while (0) #define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN)) /* * Assemble a log record from '*len' octets starting from address '*data'. * Update 'data' and 'len' to reflect the number of bytes consumed. * * '*data' is potentially an unaligned address and '*len' octets may * not be enough to complete a event record. */ static enum pmclog_parser_state pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len) { int avail, copylen, recordsize, used; uint32_t h; const int HEADERSIZE = sizeof(uint32_t); char *src, *dst; if ((avail = *len) <= 0) return (ps->ps_state = PL_STATE_ERROR); src = *data; used = 0; if (ps->ps_state == PL_STATE_NEW_RECORD) ps->ps_svcount = 0; dst = (char *) &ps->ps_saved + ps->ps_svcount; switch (ps->ps_state) { case PL_STATE_NEW_RECORD: /* * Transitions: * * Case A: avail < headersize * -> 'expecting header' * * Case B: avail >= headersize * B.1: avail < recordsize * -> 'partial record' * B.2: avail >= recordsize * -> 'new record' */ copylen = avail < HEADERSIZE ? avail : HEADERSIZE; bcopy(src, dst, copylen); ps->ps_svcount = used = copylen; if (copylen < HEADERSIZE) { ps->ps_state = PL_STATE_EXPECTING_HEADER; goto done; } src += copylen; dst += copylen; h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (recordsize <= avail) { /* full record available */ bcopy(src, dst, recordsize - copylen); ps->ps_svcount = used = recordsize; goto done; } /* header + a partial record is available */ bcopy(src, dst, avail - copylen); ps->ps_svcount = used = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; break; case PL_STATE_EXPECTING_HEADER: /* * Transitions: * * Case C: avail+saved < headersize * -> 'expecting header' * * Case D: avail+saved >= headersize * D.1: avail+saved < recordsize * -> 'partial record' * D.2: avail+saved >= recordsize * -> 'new record' * (see PARTIAL_RECORD handling below) */ if (avail + ps->ps_svcount < HEADERSIZE) { bcopy(src, dst, avail); ps->ps_svcount += avail; used = avail; break; } used = copylen = HEADERSIZE - ps->ps_svcount; bcopy(src, dst, copylen); src += copylen; dst += copylen; avail -= copylen; ps->ps_svcount += copylen; /*FALLTHROUGH*/ case PL_STATE_PARTIAL_RECORD: /* * Transitions: * * Case E: avail+saved < recordsize * -> 'partial record' * * Case F: avail+saved >= recordsize * -> 'new record' */ h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); recordsize = PMCLOG_HEADER_TO_LENGTH(h); if (recordsize <= 0) goto error; if (avail + ps->ps_svcount < recordsize) { copylen = avail; ps->ps_state = PL_STATE_PARTIAL_RECORD; } else { copylen = recordsize - ps->ps_svcount; ps->ps_state = PL_STATE_NEW_RECORD; } bcopy(src, dst, copylen); ps->ps_svcount += copylen; used += copylen; break; default: goto error; } done: *data += used; *len -= used; return ps->ps_state; error: ps->ps_state = PL_STATE_ERROR; return ps->ps_state; } /* * Get an event from the stream pointed to by '*data'. '*len' * indicates the number of bytes available to parse. Arguments * '*data' and '*len' are updated to indicate the number of bytes * consumed. */ static int pmclog_get_event(void *cookie, char **data, ssize_t *len, struct pmclog_ev *ev) { int evlen, pathlen; uint32_t h, *le, npc, noop; enum pmclog_parser_state e; struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; assert(ps->ps_state != PL_STATE_ERROR); if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; printf("state error\n"); return -1; } if (e != PL_STATE_NEW_RECORD) { ev->pl_state = PMCLOG_REQUIRE_DATA; return -1; } PMCLOG_INITIALIZE_READER(le, ps->ps_saved); PMCLOG_READ32(le,h); if (!PMCLOG_HEADER_CHECK_MAGIC(h)) { printf("bad magic\n"); ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return -1; } /* copy out the time stamp */ PMCLOG_READ32(le,ev->pl_ts.tv_sec); PMCLOG_READ32(le,ev->pl_ts.tv_nsec); evlen = PMCLOG_HEADER_TO_LENGTH(h); #define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \ (P) = (E) - offsetof(struct TYPE, pl_pathname); \ if ((P) > PATH_MAX || (P) < 0) \ goto error; \ } while (0) #define PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do { \ (SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc)) \ / sizeof(uintfptr_t); \ } while (0); switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) { case PMCLOG_TYPE_CALLCHAIN: PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_tid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags); PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags2); PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen); for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++) PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]); for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++) ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0; break; case PMCLOG_TYPE_CLOSELOG: ev->pl_state = PMCLOG_EOF; return (-1); case PMCLOG_TYPE_DROPNOTIFY: /* nothing to do */ break; case PMCLOG_TYPE_INITIALIZE: PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version); PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch); + PMCLOG_READ32(le, noop); + PMCLOG_READSTRING(le, ev->pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN); ps->ps_version = ev->pl_u.pl_i.pl_version; ps->ps_arch = ev->pl_u.pl_i.pl_arch; ps->ps_initialized = 1; break; case PMCLOG_TYPE_MAP_IN: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in); PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid); PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start); PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen); break; case PMCLOG_TYPE_MAP_OUT: PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start); PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end); break; case PMCLOG_TYPE_PMCALLOCATE: PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags); PMCLOG_READ32(le,noop); ev->pl_u.pl_a.pl_evname = pmc_pmu_event_get_by_idx(ev->pl_u.pl_a.pl_event); if (ev->pl_u.pl_a.pl_evname != NULL) break; else if ((ev->pl_u.pl_a.pl_evname = _pmc_name_of_event(ev->pl_u.pl_a.pl_event, ps->ps_arch)) == NULL) { printf("unknown event\n"); goto error; } break; case PMCLOG_TYPE_PMCALLOCATEDYN: PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event); PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags); PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX); break; case PMCLOG_TYPE_PMCATTACH: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid); PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen); break; case PMCLOG_TYPE_PMCDETACH: PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid); break; case PMCLOG_TYPE_PROCCSW: PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid); PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_c.pl_tid); break; case PMCLOG_TYPE_PROCEXEC: PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid); PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid); PMCLOG_READ32(le,noop); PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_entryaddr); PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen); break; case PMCLOG_TYPE_PROCEXIT: PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid); PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid); PMCLOG_READ32(le,noop); PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value); break; case PMCLOG_TYPE_PROCFORK: PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid); PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid); break; case PMCLOG_TYPE_SYSEXIT: PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid); break; case PMCLOG_TYPE_USERDATA: PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata); break; default: /* unknown record type */ ps->ps_state = PL_STATE_ERROR; ev->pl_state = PMCLOG_ERROR; return (-1); } ev->pl_offset = (ps->ps_offset += evlen); ev->pl_count = (ps->ps_count += 1); ev->pl_state = PMCLOG_OK; return 0; error: ev->pl_state = PMCLOG_ERROR; ps->ps_state = PL_STATE_ERROR; return -1; } /* * Extract and return the next event from the byte stream. * * Returns 0 and sets the event's state to PMCLOG_OK in case an event * was successfully parsed. Otherwise this function returns -1 and * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if * a parse error was encountered. */ int pmclog_read(void *cookie, struct pmclog_ev *ev) { int retval; ssize_t nread; struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_state == PL_STATE_ERROR) { ev->pl_state = PMCLOG_ERROR; return -1; } /* * If there isn't enough data left for a new event try and get * more data. */ if (ps->ps_len == 0) { ev->pl_state = PMCLOG_REQUIRE_DATA; /* * If we have a valid file descriptor to read from, attempt * to read from that. This read may return with an error, * (which may be EAGAIN or other recoverable error), or * can return EOF. */ if (ps->ps_fd != PMCLOG_FD_NONE) { refill: nread = read(ps->ps_fd, ps->ps_buffer, PMCLOG_BUFFER_SIZE); if (nread <= 0) { if (nread == 0) ev->pl_state = PMCLOG_EOF; else if (errno != EAGAIN) /* not restartable */ ev->pl_state = PMCLOG_ERROR; return -1; } ps->ps_len = nread; ps->ps_data = ps->ps_buffer; } else { return -1; } } assert(ps->ps_len > 0); /* Retrieve one event from the byte stream. */ retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev); /* * If we need more data and we have a configured fd, try read * from it. */ if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA && ps->ps_fd != -1) { assert(ps->ps_len == 0); goto refill; } return retval; } /* * Feed data to a memory based parser. * * The memory area pointed to by 'data' needs to be valid till the * next error return from pmclog_next_event(). */ int pmclog_feed(void *cookie, char *data, int len) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (len < 0 || /* invalid length */ ps->ps_buffer || /* called for a file parser */ ps->ps_len != 0) /* unnecessary call */ return -1; ps->ps_data = data; ps->ps_len = len; return 0; } /* * Allocate and initialize parser state. */ void * pmclog_open(int fd) { struct pmclog_parse_state *ps; if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL) return NULL; ps->ps_state = PL_STATE_NEW_RECORD; ps->ps_arch = -1; ps->ps_initialized = 0; ps->ps_count = 0; ps->ps_offset = (off_t) 0; bzero(&ps->ps_saved, sizeof(ps->ps_saved)); ps->ps_svcount = 0; ps->ps_fd = fd; ps->ps_data = NULL; ps->ps_buffer = NULL; ps->ps_len = 0; /* allocate space for a work area */ if (ps->ps_fd != PMCLOG_FD_NONE) { if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) { free(ps); return NULL; } } return ps; } /* * Free up parser state. */ void pmclog_close(void *cookie) { struct pmclog_parse_state *ps; ps = (struct pmclog_parse_state *) cookie; if (ps->ps_buffer) free(ps->ps_buffer); free(ps); } Index: head/lib/libpmc/pmclog.h =================================================================== --- head/lib/libpmc/pmclog.h (revision 334595) +++ head/lib/libpmc/pmclog.h (revision 334596) @@ -1,184 +1,185 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PMCLOG_H_ #define _PMCLOG_H_ #include #include enum pmclog_state { PMCLOG_OK, PMCLOG_EOF, PMCLOG_REQUIRE_DATA, PMCLOG_ERROR }; struct pmclog_ev_callchain { uint32_t pl_pid; uint32_t pl_tid; uint32_t pl_pmcid; uint32_t pl_cpuflags; uint32_t pl_cpuflags2; uint32_t pl_npc; uintfptr_t pl_pc[PMC_CALLCHAIN_DEPTH_MAX]; }; struct pmclog_ev_dropnotify { }; struct pmclog_ev_closelog { }; struct pmclog_ev_initialize { uint32_t pl_version; uint32_t pl_arch; + char pl_cpuid[PATH_MAX]; }; struct pmclog_ev_map_in { pid_t pl_pid; uintfptr_t pl_start; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_map_out { pid_t pl_pid; uintfptr_t pl_start; uintfptr_t pl_end; }; struct pmclog_ev_pcsample { uintfptr_t pl_pc; pid_t pl_pid; pid_t pl_tid; pmc_id_t pl_pmcid; uint32_t pl_flags; uint32_t pl_usermode; }; struct pmclog_ev_pmcallocate { uint32_t pl_event; const char * pl_evname; uint32_t pl_flags; pmc_id_t pl_pmcid; }; struct pmclog_ev_pmcallocatedyn { uint32_t pl_event; char pl_evname[PMC_NAME_MAX]; uint32_t pl_flags; pmc_id_t pl_pmcid; }; struct pmclog_ev_pmcattach { pmc_id_t pl_pmcid; pid_t pl_pid; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_pmcdetach { pmc_id_t pl_pmcid; pid_t pl_pid; }; struct pmclog_ev_proccsw { pid_t pl_pid; pid_t pl_tid; pmc_id_t pl_pmcid; pmc_value_t pl_value; }; struct pmclog_ev_procexec { pid_t pl_pid; pmc_id_t pl_pmcid; uintfptr_t pl_entryaddr; char pl_pathname[PATH_MAX]; }; struct pmclog_ev_procexit { uint32_t pl_pid; pmc_id_t pl_pmcid; pmc_value_t pl_value; }; struct pmclog_ev_procfork { pid_t pl_oldpid; pid_t pl_newpid; }; struct pmclog_ev_sysexit { pid_t pl_pid; }; struct pmclog_ev_userdata { uint32_t pl_userdata; }; struct pmclog_ev { enum pmclog_state pl_state; /* state after 'get_event()' */ off_t pl_offset; /* byte offset in stream */ size_t pl_count; /* count of records so far */ struct timespec pl_ts; /* log entry timestamp */ enum pmclog_type pl_type; /* type of log entry */ union { /* log entry data */ struct pmclog_ev_callchain pl_cc; struct pmclog_ev_closelog pl_cl; struct pmclog_ev_dropnotify pl_dn; struct pmclog_ev_initialize pl_i; struct pmclog_ev_map_in pl_mi; struct pmclog_ev_map_out pl_mo; struct pmclog_ev_pmcallocate pl_a; struct pmclog_ev_pmcallocatedyn pl_ad; struct pmclog_ev_pmcattach pl_t; struct pmclog_ev_pmcdetach pl_d; struct pmclog_ev_proccsw pl_c; struct pmclog_ev_procexec pl_x; struct pmclog_ev_procexit pl_e; struct pmclog_ev_procfork pl_f; struct pmclog_ev_sysexit pl_se; struct pmclog_ev_userdata pl_u; } pl_u; }; #define PMCLOG_FD_NONE (-1) __BEGIN_DECLS void *pmclog_open(int _fd); int pmclog_feed(void *_cookie, char *_data, int _len); int pmclog_read(void *_cookie, struct pmclog_ev *_ev); void pmclog_close(void *_cookie); __END_DECLS #endif Index: head/sys/amd64/include/pmc_mdep.h =================================================================== --- head/sys/amd64/include/pmc_mdep.h (revision 334595) +++ head/sys/amd64/include/pmc_mdep.h (revision 334596) @@ -1,140 +1,139 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* Machine dependent interfaces */ #ifndef _MACHINE_PMC_MDEP_H #define _MACHINE_PMC_MDEP_H 1 #ifdef _KERNEL struct pmc_mdep; #endif #include #include #include #include /* * Intel processors implementing V2 and later of the Intel performance * measurement architecture have PMCs of the following classes: TSC, * IAF, IAP, UCF and UCP. */ #define PMC_MDEP_CLASS_INDEX_TSC 1 #define PMC_MDEP_CLASS_INDEX_K8 2 #define PMC_MDEP_CLASS_INDEX_P4 2 #define PMC_MDEP_CLASS_INDEX_IAP 2 #define PMC_MDEP_CLASS_INDEX_IAF 3 #define PMC_MDEP_CLASS_INDEX_UCP 4 #define PMC_MDEP_CLASS_INDEX_UCF 5 /* * On the amd64 platform we support the following PMCs. * * TSC The timestamp counter * K8 AMD Athlon64 and Opteron PMCs in 64 bit mode. * PIV Intel P4/HTT and P4/EMT64 * IAP Intel Core/Core2/Atom CPUs in 64 bits mode. * IAF Intel fixed-function PMCs in Core2 and later CPUs. * UCP Intel Uncore programmable PMCs. * UCF Intel Uncore fixed-function PMCs. */ union pmc_md_op_pmcallocate { struct pmc_md_amd_op_pmcallocate pm_amd; - struct pmc_md_iaf_op_pmcallocate pm_iaf; struct pmc_md_iap_op_pmcallocate pm_iap; struct pmc_md_ucf_op_pmcallocate pm_ucf; struct pmc_md_ucp_op_pmcallocate pm_ucp; uint64_t __pad[4]; }; /* Logging */ #define PMCLOG_READADDR PMCLOG_READ64 #define PMCLOG_EMITADDR PMCLOG_EMIT64 #ifdef _KERNEL union pmc_md_pmc { struct pmc_md_amd_pmc pm_amd; struct pmc_md_iaf_pmc pm_iaf; struct pmc_md_iap_pmc pm_iap; struct pmc_md_ucf_pmc pm_ucf; struct pmc_md_ucp_pmc pm_ucp; }; #define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_rip) #define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_rbp) #define PMC_TRAPFRAME_TO_USER_SP(TF) ((TF)->tf_rsp) #define PMC_TRAPFRAME_TO_KERNEL_SP(TF) ((TF)->tf_rsp) #define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \ (((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */ #define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \ (((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */ #define PMC_AT_FUNCTION_EPILOGUE_RET(I) \ (((I) & 0xFF) == 0xC3) /* ret */ #define PMC_IN_TRAP_HANDLER(PC) \ ((PC) >= (uintptr_t) start_exceptions && \ (PC) < (uintptr_t) end_exceptions) #define PMC_IN_KERNEL_STACK(S,START,END) \ ((S) >= (START) && (S) < (END)) #define PMC_IN_KERNEL(va) INKERNEL(va) #define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS) /* Build a fake kernel trapframe from current instruction pointer. */ #define PMC_FAKE_TRAPFRAME(TF) \ do { \ (TF)->tf_cs = 0; (TF)->tf_rflags = 0; \ __asm __volatile("movq %%rbp,%0" : "=r" ((TF)->tf_rbp)); \ __asm __volatile("movq %%rsp,%0" : "=r" ((TF)->tf_rsp)); \ __asm __volatile("call 1f \n\t1: pop %0" : "=r"((TF)->tf_rip)); \ } while (0) /* * Prototypes */ void start_exceptions(void), end_exceptions(void); struct pmc_mdep *pmc_amd_initialize(void); void pmc_amd_finalize(struct pmc_mdep *_md); struct pmc_mdep *pmc_intel_initialize(void); void pmc_intel_finalize(struct pmc_mdep *_md); #endif /* _KERNEL */ #endif /* _MACHINE_PMC_MDEP_H */ Index: head/sys/dev/hwpmc/hwpmc_core.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_core.c (revision 334595) +++ head/sys/dev/hwpmc/hwpmc_core.c (revision 334596) @@ -1,1322 +1,1336 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Intel Core PMCs. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #if (__FreeBSD_version >= 1100000) #include #else #include #endif #include #include #include #include #define CORE_CPUID_REQUEST 0xA #define CORE_CPUID_REQUEST_SIZE 0x4 #define CORE_CPUID_EAX 0x0 #define CORE_CPUID_EBX 0x1 #define CORE_CPUID_ECX 0x2 #define CORE_CPUID_EDX 0x3 #define IAF_PMC_CAPS \ (PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \ PMC_CAP_USER | PMC_CAP_SYSTEM) #define IAF_RI_TO_MSR(RI) ((RI) + (1 << 30)) #define IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) #define EV_IS_NOTARCH 0 #define EV_IS_ARCH_SUPP 1 #define EV_IS_ARCH_NOTSUPP -1 /* * "Architectural" events defined by Intel. The values of these * symbols correspond to positions in the bitmask returned by * the CPUID.0AH instruction. */ enum core_arch_events { CORE_AE_BRANCH_INSTRUCTION_RETIRED = 5, CORE_AE_BRANCH_MISSES_RETIRED = 6, CORE_AE_INSTRUCTION_RETIRED = 1, CORE_AE_LLC_MISSES = 4, CORE_AE_LLC_REFERENCE = 3, CORE_AE_UNHALTED_REFERENCE_CYCLES = 2, CORE_AE_UNHALTED_CORE_CYCLES = 0 }; static enum pmc_cputype core_cputype; struct core_cpu { volatile uint32_t pc_resync; volatile uint32_t pc_iafctrl; /* Fixed function control. */ volatile uint64_t pc_globalctrl; /* Global control register. */ struct pmc_hw pc_corepmcs[]; }; static struct core_cpu **core_pcpu; static uint32_t core_architectural_events; static uint64_t core_pmcmask; static int core_iaf_ri; /* relative index of fixed counters */ static int core_iaf_width; static int core_iaf_npmc; static int core_iap_width; static int core_iap_npmc; static int core_iap_wroffset; static int core_pcpu_noop(struct pmc_mdep *md, int cpu) { (void) md; (void) cpu; return (0); } static int core_pcpu_init(struct pmc_mdep *md, int cpu) { struct pmc_cpu *pc; struct core_cpu *cc; struct pmc_hw *phw; int core_ri, n, npmc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[iaf,%d] insane cpu number %d", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu); core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri; npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num; if (core_cputype != PMC_CPU_INTEL_CORE) npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num; cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw), M_PMC, M_WAITOK | M_ZERO); core_pcpu[cpu] = cc; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL && cc != NULL, ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu)); for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) { phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n + core_ri); phw->phw_pmc = NULL; pc->pc_hwpmcs[n + core_ri] = phw; } return (0); } static int core_pcpu_fini(struct pmc_mdep *md, int cpu) { int core_ri, n, npmc; struct pmc_cpu *pc; struct core_cpu *cc; uint64_t msr = 0; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] insane cpu number (%d)", __LINE__, cpu)); PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu); if ((cc = core_pcpu[cpu]) == NULL) return (0); core_pcpu[cpu] = NULL; pc = pmc_pcpu[cpu]; KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__, cpu)); npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num; core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri; for (n = 0; n < npmc; n++) { msr = rdmsr(IAP_EVSEL0 + n) & ~IAP_EVSEL_MASK; wrmsr(IAP_EVSEL0 + n, msr); } if (core_cputype != PMC_CPU_INTEL_CORE) { msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK; wrmsr(IAF_CTRL, msr); npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num; } for (n = 0; n < npmc; n++) pc->pc_hwpmcs[n + core_ri] = NULL; free(cc, M_PMC); return (0); } /* * Fixed function counters. */ static pmc_value_t iaf_perfctr_value_to_reload_count(pmc_value_t v) { /* If the PMC has overflowed, return a reload count of zero. */ if ((v & (1ULL << (core_iaf_width - 1))) == 0) return (0); v &= (1ULL << core_iaf_width) - 1; return (1ULL << core_iaf_width) - v; } static pmc_value_t iaf_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << core_iaf_width) - rlc; } static int iaf_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { - enum pmc_event ev; - uint32_t caps, flags, validflags; + uint8_t ev, umask; + uint32_t caps, flags, config; + const struct pmc_md_iap_op_pmcallocate *iap; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps); if (ri < 0 || ri > core_iaf_npmc) return (EINVAL); caps = a->pm_caps; if (a->pm_class != PMC_CLASS_IAF || (caps & IAF_PMC_CAPS) != caps) return (EINVAL); - ev = pm->pm_event; + iap = &a->pm_md.pm_iap; + config = iap->pm_iap_config; + ev = IAP_EVSEL_GET(config); + umask = IAP_UMASK_GET(config); - - if (ev == PMC_EV_IAF_INSTR_RETIRED_ANY && ri != 0) + /* INST_RETIRED.ANY */ + if (ev == 0xC0 && ri != 0) return (EINVAL); - if (ev == PMC_EV_IAF_CPU_CLK_UNHALTED_CORE && ri != 1) + /* CPU_CLK_UNHALTED.THREAD */ + else if (ev == 0x3C && ri != 1) return (EINVAL); - if (ev == PMC_EV_IAF_CPU_CLK_UNHALTED_REF && ri != 2) + /* CPU_CLK_UNHALTED.REF */ + else if (ev == 0x0 && umask == 0x3 && ri != 2) return (EINVAL); + else + return (EINVAL); - flags = a->pm_md.pm_iaf.pm_iaf_flags; - - validflags = IAF_MASK; + flags = 0; + if (config & IAP_OS) + flags |= IAF_OS; + if (config & IAP_USR) + flags |= IAF_USR; + if (config & IAP_ANY) + flags |= IAF_ANY; + if (config & IAP_INT) + flags |= IAF_PMI; if (caps & PMC_CAP_INTERRUPT) flags |= IAF_PMI; if (caps & PMC_CAP_SYSTEM) flags |= IAF_OS; if (caps & PMC_CAP_USER) flags |= IAF_USR; if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) flags |= (IAF_OS | IAF_USR); pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4)); PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx", (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl); return (0); } static int iaf_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__, cpu)); core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm; return (0); } static int iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { int error; struct pmc_hw *phw; char iaf_name[PMC_NAME_MAX]; phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri]; (void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri); if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX, NULL)) != 0) return (error); pi->pm_class = PMC_CLASS_IAF; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int iaf_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc; return (0); } static int iaf_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[iaf,%d] ri %d out of range", __LINE__, ri)); *msr = IAF_RI_TO_MSR(ri); return (0); } static int iaf_read_pmc(int cpu, int ri, pmc_value_t *v) { struct pmc *pm; pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc; KASSERT(pm, ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu, ri, ri + core_iaf_ri)); tmp = rdpmc(IAF_RI_TO_MSR(ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = iaf_perfctr_value_to_reload_count(tmp); else *v = tmp & ((1ULL << core_iaf_width) - 1); PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri, IAF_RI_TO_MSR(ri), *v); return (0); } static int iaf_release_pmc(int cpu, int ri, struct pmc *pmc) { PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__)); return (0); } static int iaf_start_pmc(int cpu, int ri) { struct pmc *pm; struct core_cpu *iafc; uint64_t msr = 0; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri); iafc = core_pcpu[cpu]; pm = iafc->pc_corepmcs[ri + core_iaf_ri].phw_pmc; iafc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl; msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK; wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK)); do { iafc->pc_resync = 0; iafc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET)); msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK; wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl & IAF_GLOBAL_CTRL_MASK)); } while (iafc->pc_resync != 0); PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)", iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL), iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL)); return (0); } static int iaf_stop_pmc(int cpu, int ri) { uint32_t fc; struct core_cpu *iafc; uint64_t msr = 0; PMCDBG2(MDP,STO,1,"iaf-stop cpu=%d ri=%d", cpu, ri); iafc = core_pcpu[cpu]; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); fc = (IAF_MASK << (ri * 4)); iafc->pc_iafctrl &= ~fc; PMCDBG1(MDP,STO,1,"iaf-stop iafctrl=%x", iafc->pc_iafctrl); msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK; wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK)); do { iafc->pc_resync = 0; iafc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET)); msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK; wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl & IAF_GLOBAL_CTRL_MASK)); } while (iafc->pc_resync != 0); PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)", iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL), iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL)); return (0); } static int iaf_write_pmc(int cpu, int ri, pmc_value_t v) { struct core_cpu *cc; struct pmc *pm; uint64_t msr; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iaf_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); cc = core_pcpu[cpu]; pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc; KASSERT(pm, ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = iaf_reload_count_to_perfctr_value(v); /* Turn off fixed counters */ msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK; wrmsr(IAF_CTRL, msr); wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1)); /* Turn on fixed counters */ msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK; wrmsr(IAF_CTRL, msr | (cc->pc_iafctrl & IAF_CTRL_MASK)); PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx " "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v, (uintmax_t) rdmsr(IAF_CTRL), (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri))); return (0); } static void iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "iaf-initialize"); pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF]; pcd->pcd_caps = IAF_PMC_CAPS; pcd->pcd_class = PMC_CLASS_IAF; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = iaf_allocate_pmc; pcd->pcd_config_pmc = iaf_config_pmc; pcd->pcd_describe = iaf_describe; pcd->pcd_get_config = iaf_get_config; pcd->pcd_get_msr = iaf_get_msr; pcd->pcd_pcpu_fini = core_pcpu_noop; pcd->pcd_pcpu_init = core_pcpu_noop; pcd->pcd_read_pmc = iaf_read_pmc; pcd->pcd_release_pmc = iaf_release_pmc; pcd->pcd_start_pmc = iaf_start_pmc; pcd->pcd_stop_pmc = iaf_stop_pmc; pcd->pcd_write_pmc = iaf_write_pmc; md->pmd_npmc += npmc; } /* * Intel programmable PMCs. */ /* Sub fields of UMASK that this event supports. */ #define IAP_M_CORE (1 << 0) /* Core specificity */ #define IAP_M_AGENT (1 << 1) /* Agent specificity */ #define IAP_M_PREFETCH (1 << 2) /* Prefetch */ #define IAP_M_MESI (1 << 3) /* MESI */ #define IAP_M_SNOOPRESPONSE (1 << 4) /* Snoop response */ #define IAP_M_SNOOPTYPE (1 << 5) /* Snoop type */ #define IAP_M_TRANSITION (1 << 6) /* Transition */ #define IAP_F_CORE (0x3 << 14) /* Core specificity */ #define IAP_F_AGENT (0x1 << 13) /* Agent specificity */ #define IAP_F_PREFETCH (0x3 << 12) /* Prefetch */ #define IAP_F_MESI (0xF << 8) /* MESI */ #define IAP_F_SNOOPRESPONSE (0xB << 8) /* Snoop response */ #define IAP_F_SNOOPTYPE (0x3 << 8) /* Snoop type */ #define IAP_F_TRANSITION (0x1 << 12) /* Transition */ #define IAP_PREFETCH_RESERVED (0x2 << 12) #define IAP_CORE_THIS (0x1 << 14) #define IAP_CORE_ALL (0x3 << 14) #define IAP_F_CMASK 0xFF000000 static pmc_value_t iap_perfctr_value_to_reload_count(pmc_value_t v) { /* If the PMC has overflowed, return a reload count of zero. */ if ((v & (1ULL << (core_iap_width - 1))) == 0) return (0); v &= (1ULL << core_iap_width) - 1; return (1ULL << core_iap_width) - v; } static pmc_value_t iap_reload_count_to_perfctr_value(pmc_value_t rlc) { return (1ULL << core_iap_width) - rlc; } static int iap_pmc_has_overflowed(int ri) { uint64_t v; /* * We treat a Core (i.e., Intel architecture v1) PMC as has * having overflowed if its MSB is zero. */ v = rdpmc(ri); return ((v & (1ULL << (core_iap_width - 1))) == 0); } static int iap_event_corei7_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* * Events valid only on counter 0, 1. */ case 0x40: case 0x41: case 0x42: case 0x43: case 0x51: case 0x63: mask = 0x3; break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int iap_event_westmere_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* * Events valid only on counter 0. */ case 0x60: case 0xB3: mask = 0x1; break; /* * Events valid only on counter 0, 1. */ case 0x4C: case 0x4E: case 0x51: case 0x63: mask = 0x3; break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* Events valid only on counter 0. */ case 0xB7: mask = 0x1; break; /* Events valid only on counter 1. */ case 0xC0: mask = 0x2; break; /* Events valid only on counter 2. */ case 0x48: case 0xA2: case 0xA3: mask = 0x4; break; /* Events valid only on counter 3. */ case 0xBB: case 0xCD: mask = 0x8; break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int iap_event_ok_on_counter(uint8_t evsel, int ri) { uint32_t mask; switch (evsel) { /* * Events valid only on counter 0. */ case 0x10: case 0x14: case 0x18: case 0xB3: case 0xC1: case 0xCB: mask = (1 << 0); break; /* * Events valid only on counter 1. */ case 0x11: case 0x12: case 0x13: mask = (1 << 1); break; default: mask = ~0; /* Any row index is ok. */ } return (mask & (1 << ri)); } static int iap_allocate_pmc(int cpu, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { enum pmc_event map; uint8_t ev; uint32_t caps; const struct pmc_md_iap_op_pmcallocate *iap; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index value %d", __LINE__, ri)); /* check requested capabilities */ caps = a->pm_caps; if ((IAP_PMC_CAPS & caps) != caps) return (EPERM); map = 0; /* XXX: silent GCC warning */ iap = &a->pm_md.pm_iap; ev = IAP_EVSEL_GET(iap->pm_iap_config); switch (core_cputype) { case PMC_CPU_INTEL_COREI7: case PMC_CPU_INTEL_NEHALEM_EX: if (iap_event_corei7_ok_on_counter(ev, ri) == 0) return (EINVAL); break; case PMC_CPU_INTEL_SKYLAKE: case PMC_CPU_INTEL_SKYLAKE_XEON: case PMC_CPU_INTEL_BROADWELL: case PMC_CPU_INTEL_BROADWELL_XEON: case PMC_CPU_INTEL_SANDYBRIDGE: case PMC_CPU_INTEL_SANDYBRIDGE_XEON: case PMC_CPU_INTEL_IVYBRIDGE: case PMC_CPU_INTEL_IVYBRIDGE_XEON: case PMC_CPU_INTEL_HASWELL: case PMC_CPU_INTEL_HASWELL_XEON: if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0) return (EINVAL); break; case PMC_CPU_INTEL_WESTMERE: case PMC_CPU_INTEL_WESTMERE_EX: if (iap_event_westmere_ok_on_counter(ev, ri) == 0) return (EINVAL); break; default: if (iap_event_ok_on_counter(ev, ri) == 0) return (EINVAL); } pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config; return (0); } static int iap_config_pmc(int cpu, int ri, struct pmc *pm) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__, cpu)); core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm; return (0); } static int iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) { int error; struct pmc_hw *phw; char iap_name[PMC_NAME_MAX]; phw = &core_pcpu[cpu]->pc_corepmcs[ri]; (void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri); if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX, NULL)) != 0) return (error); pi->pm_class = PMC_CLASS_IAP; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; *ppmc = phw->phw_pmc; } else { pi->pm_enabled = FALSE; *ppmc = NULL; } return (0); } static int iap_get_config(int cpu, int ri, struct pmc **ppm) { *ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc; return (0); } static int iap_get_msr(int ri, uint32_t *msr) { KASSERT(ri >= 0 && ri < core_iap_npmc, ("[iap,%d] ri %d out of range", __LINE__, ri)); *msr = ri; return (0); } static int iap_read_pmc(int cpu, int ri, pmc_value_t *v) { struct pmc *pm; pmc_value_t tmp; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc; KASSERT(pm, ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri)); tmp = rdpmc(ri); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = iap_perfctr_value_to_reload_count(tmp); else *v = tmp & ((1ULL << core_iap_width) - 1); PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri, IAP_PMC0 + ri, *v); return (0); } static int iap_release_pmc(int cpu, int ri, struct pmc *pm) { (void) pm; PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri, pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__)); return (0); } static int iap_start_pmc(int cpu, int ri) { struct pmc *pm; uint32_t evsel; struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal CPU value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row-index %d", __LINE__, ri)); cc = core_pcpu[cpu]; pm = cc->pc_corepmcs[ri].phw_pmc; KASSERT(pm, ("[core,%d] starting cpu%d,ri%d with no pmc configured", __LINE__, cpu, ri)); PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri); evsel = pm->pm_md.pm_iap.pm_iap_evsel; PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x", cpu, ri, IAP_EVSEL0 + ri, evsel); /* Event specific configuration. */ switch (IAP_EVSEL_GET(evsel)) { case 0xB7: wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp); break; case 0xBB: wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp); break; default: break; } wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN); if (core_cputype == PMC_CPU_INTEL_CORE) return (0); do { cc->pc_resync = 0; cc->pc_globalctrl |= (1ULL << ri); wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); } while (cc->pc_resync != 0); return (0); } static int iap_stop_pmc(int cpu, int ri) { struct pmc *pm; struct core_cpu *cc; uint64_t msr; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row index %d", __LINE__, ri)); cc = core_pcpu[cpu]; pm = cc->pc_corepmcs[ri].phw_pmc; KASSERT(pm, ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__, cpu, ri)); PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri); msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK; wrmsr(IAP_EVSEL0 + ri, msr); /* stop hw */ if (core_cputype == PMC_CPU_INTEL_CORE) return (0); msr = 0; do { cc->pc_resync = 0; cc->pc_globalctrl &= ~(1ULL << ri); msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK; wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl); } while (cc->pc_resync != 0); return (0); } static int iap_write_pmc(int cpu, int ri, pmc_value_t v) { struct pmc *pm; struct core_cpu *cc; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[core,%d] illegal cpu value %d", __LINE__, cpu)); KASSERT(ri >= 0 && ri < core_iap_npmc, ("[core,%d] illegal row index %d", __LINE__, ri)); cc = core_pcpu[cpu]; pm = cc->pc_corepmcs[ri].phw_pmc; KASSERT(pm, ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__, cpu, ri)); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = iap_reload_count_to_perfctr_value(v); v &= (1ULL << core_iap_width) - 1; PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri, IAP_PMC0 + ri, v); /* * Write the new value to the counter (or it's alias). The * counter will be in a stopped state when the pcd_write() * entry point is called. */ wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v); return (0); } static void iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth, int flags) { struct pmc_classdep *pcd; KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__)); PMCDBG0(MDP,INI,1, "iap-initialize"); /* Remember the set of architectural events supported. */ core_architectural_events = ~flags; pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP]; pcd->pcd_caps = IAP_PMC_CAPS; pcd->pcd_class = PMC_CLASS_IAP; pcd->pcd_num = npmc; pcd->pcd_ri = md->pmd_npmc; pcd->pcd_width = pmcwidth; pcd->pcd_allocate_pmc = iap_allocate_pmc; pcd->pcd_config_pmc = iap_config_pmc; pcd->pcd_describe = iap_describe; pcd->pcd_get_config = iap_get_config; pcd->pcd_get_msr = iap_get_msr; pcd->pcd_pcpu_fini = core_pcpu_fini; pcd->pcd_pcpu_init = core_pcpu_init; pcd->pcd_read_pmc = iap_read_pmc; pcd->pcd_release_pmc = iap_release_pmc; pcd->pcd_start_pmc = iap_start_pmc; pcd->pcd_stop_pmc = iap_stop_pmc; pcd->pcd_write_pmc = iap_write_pmc; md->pmd_npmc += npmc; } static int core_intr(int cpu, struct trapframe *tf) { pmc_value_t v; struct pmc *pm; struct core_cpu *cc; int error, found_interrupt, ri; uint64_t msr; PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); found_interrupt = 0; cc = core_pcpu[cpu]; for (ri = 0; ri < core_iap_npmc; ri++) { if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; if (!iap_pmc_has_overflowed(ri)) continue; found_interrupt = 1; if (pm->pm_state != PMC_STATE_RUNNING) continue; error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); v = pm->pm_sc.pm_reloadcount; v = iap_reload_count_to_perfctr_value(v); /* * Stop the counter, reload it but only restart it if * the PMC is not stalled. */ msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK; wrmsr(IAP_EVSEL0 + ri, msr); wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v); if (error) continue; wrmsr(IAP_EVSEL0 + ri, msr | (pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN)); } if (found_interrupt) lapic_reenable_pmc(); if (found_interrupt) counter_u64_add(pmc_stats.pm_intr_processed, 1); else counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (found_interrupt); } static int core2_intr(int cpu, struct trapframe *tf) { int error, found_interrupt, n; uint64_t flag, intrstatus, intrenable, msr; struct pmc *pm; struct core_cpu *cc; pmc_value_t v; PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); /* * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which * PMCs have a pending PMI interrupt. We take a 'snapshot' of * the current set of interrupting PMCs and process these * after stopping them. */ intrstatus = rdmsr(IA_GLOBAL_STATUS); intrenable = intrstatus & core_pmcmask; PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu, (uintmax_t) intrstatus); found_interrupt = 0; cc = core_pcpu[cpu]; KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__)); cc->pc_globalctrl &= ~intrenable; cc->pc_resync = 1; /* MSRs now potentially out of sync. */ /* * Stop PMCs and clear overflow status bits. */ msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK; wrmsr(IA_GLOBAL_CTRL, msr); wrmsr(IA_GLOBAL_OVF_CTRL, intrenable | IA_GLOBAL_STATUS_FLAG_OVFBUF | IA_GLOBAL_STATUS_FLAG_CONDCHG); /* * Look for interrupts from fixed function PMCs. */ for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc; n++, flag <<= 1) { if ((intrstatus & flag) == 0) continue; found_interrupt = 1; pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); if (error) intrenable &= ~flag; v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount); /* Reload sampling count. */ wrmsr(IAF_CTR0 + n, v); PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", cpu, error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n))); } /* * Process interrupts from the programmable counters. */ for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) { if ((intrstatus & flag) == 0) continue; found_interrupt = 1; pm = cc->pc_corepmcs[n].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); if (error) intrenable &= ~flag; v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount); PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error, (uintmax_t) v); /* Reload sampling count. */ wrmsr(core_iap_wroffset + IAP_PMC0 + n, v); } /* * Reenable all non-stalled PMCs. */ PMCDBG2(MDP,INT, 1, "cpu=%d intrenable=%jx", cpu, (uintmax_t) intrenable); cc->pc_globalctrl |= intrenable; wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl & IA_GLOBAL_CTRL_MASK); PMCDBG5(MDP,INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx " "ovf=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL), (uintmax_t) rdmsr(IA_GLOBAL_CTRL), (uintmax_t) rdmsr(IA_GLOBAL_STATUS), (uintmax_t) rdmsr(IA_GLOBAL_OVF_CTRL)); if (found_interrupt) lapic_reenable_pmc(); if (found_interrupt) counter_u64_add(pmc_stats.pm_intr_processed, 1); else counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (found_interrupt); } int pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override) { int cpuid[CORE_CPUID_REQUEST_SIZE]; int ipa_version, flags, nflags; do_cpuid(CORE_CPUID_REQUEST, cpuid); ipa_version = (version_override > 0) ? version_override : cpuid[CORE_CPUID_EAX] & 0xFF; core_cputype = md->pmd_cputype; PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d ipa-version=%d", core_cputype, maxcpu, ipa_version); if (ipa_version < 1 || ipa_version > 4 || (core_cputype != PMC_CPU_INTEL_CORE && ipa_version == 1)) { /* Unknown PMC architecture. */ printf("hwpc_core: unknown PMC architecture: %d\n", ipa_version); return (EPROGMISMATCH); } core_iap_wroffset = 0; if (cpu_feature2 & CPUID2_PDCM) { if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) { PMCDBG0(MDP, INI, 1, "core-init full-width write supported"); core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0; } else PMCDBG0(MDP, INI, 1, "core-init full-width write NOT supported"); } else PMCDBG0(MDP, INI, 1, "core-init pdcm not supported"); core_pmcmask = 0; /* * Initialize programmable counters. */ core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF; core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF; core_pmcmask |= ((1ULL << core_iap_npmc) - 1); nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF; flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1); iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags); /* * Initialize fixed function counters, if present. */ if (core_cputype != PMC_CPU_INTEL_CORE) { core_iaf_ri = core_iap_npmc; core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F; core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF; iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width); core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET; } PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask, core_iaf_ri); core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC, M_ZERO | M_WAITOK); /* * Choose the appropriate interrupt handler. */ if (ipa_version == 1) md->pmd_intr = core_intr; else md->pmd_intr = core2_intr; md->pmd_pcpu_fini = NULL; md->pmd_pcpu_init = NULL; return (0); } void pmc_core_finalize(struct pmc_mdep *md) { PMCDBG0(MDP,INI,1, "core-finalize"); free(core_pcpu, M_PMC); core_pcpu = NULL; } Index: head/sys/dev/hwpmc/hwpmc_core.h =================================================================== --- head/sys/dev/hwpmc/hwpmc_core.h (revision 334595) +++ head/sys/dev/hwpmc/hwpmc_core.h (revision 334596) @@ -1,204 +1,197 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _DEV_HWPMC_CORE_H_ #define _DEV_HWPMC_CORE_H_ 1 #define IA32_PERF_CAPABILITIES 0x345 #define PERFCAP_LBR_FORMAT 0x003f #define PERFCAP_PEBS_TRAP 0x0040 #define PERFCAP_PEBS_SAVEARCH 0x0080 #define PERFCAP_PEBS_RECFORMAT 0x0f00 #define PERFCAP_SMM_FREEZE 0x1000 #define PERFCAP_FW_WRITE 0x2000 /* full width write aliases */ -/* - * Fixed-function PMCs. - */ -struct pmc_md_iaf_op_pmcallocate { - uint16_t pm_iaf_flags; /* additional flags */ -}; - #define IAF_OS 0x1 #define IAF_USR 0x2 #define IAF_ANY 0x4 #define IAF_PMI 0x8 /* * Programmable PMCs. */ struct pmc_md_iap_op_pmcallocate { uint32_t pm_iap_config; uint64_t pm_iap_rsp; }; #define IAP_EVSEL(C) ((C) & 0xFF) #define IAP_UMASK(C) (((C) & 0xFF) << 8) #define IAP_USR (1 << 16) #define IAP_OS (1 << 17) #define IAP_EDGE (1 << 18) #define IAP_INT (1 << 20) #define IAP_ANY (1 << 21) #define IAP_EN (1 << 22) #define IAP_INV (1 << 23) #define IAP_CMASK(C) (((C) & 0xFF) << 24) #define IAP_EVSEL_GET(C) ((C) & 0xFF) #define IAP_UMASK_GET(C) (((C) & 0xFF00) >> 8) #define IA_OFFCORE_RSP_MASK_I7WM 0x000000F7FF #define IA_OFFCORE_RSP_MASK_SBIB 0x3F807F8FFF #ifdef _KERNEL /* * Fixed-function counters. */ #define IAF_MASK 0xF #define IAF_COUNTER_MASK 0x0000ffffffffffff #define IAF_CTR0 0x309 #define IAF_CTR1 0x30A #define IAF_CTR2 0x30B /* * The IAF_CTRL MSR is laid out in the following way. * * Bit Position Use * 63 - 12 Reserved (do not touch) * 11 Ctr 2 PMI * 10 Reserved (do not touch) * 9-8 Ctr 2 Enable * 7 Ctr 1 PMI * 6 Reserved (do not touch) * 5-4 Ctr 1 Enable * 3 Ctr 0 PMI * 2 Reserved (do not touch) * 1-0 Ctr 0 Enable (3: All Levels, 2: User, 1: OS, 0: Disable) */ #define IAF_OFFSET 32 #define IAF_CTRL 0x38D #define IAF_CTRL_MASK 0x0000000000000bbb /* * Programmable counters. */ #define IAP_PMC0 0x0C1 #define IAP_A_PMC0 0x4C1 /* * IAP_EVSEL(n) is laid out in the following way. * * Bit Position Use * 63-31 Reserved (do not touch) * 31-24 Counter Mask * 23 Invert * 22 Enable * 21 Reserved (do not touch) * 20 APIC Interrupt Enable * 19 Pin Control * 18 Edge Detect * 17 OS * 16 User * 15-8 Unit Mask * 7-0 Event Select */ #define IAP_EVSEL_MASK 0x00000000ffdfffff #define IAP_EVSEL0 0x186 /* * Simplified programming interface in Intel Performance Architecture * v2 and later. */ #define IA_GLOBAL_STATUS 0x38E #define IA_GLOBAL_CTRL 0x38F /* * IA_GLOBAL_CTRL is laid out in the following way. * * Bit Position Use * 63-35 Reserved (do not touch) * 34 IAF Counter 2 Enable * 33 IAF Counter 1 Enable * 32 IAF Counter 0 Enable * 31-0 Depends on programmable counters */ /* The mask is only for the fixed porttion of the register. */ #define IAF_GLOBAL_CTRL_MASK 0x0000000700000000 /* The mask is only for the programmable porttion of the register. */ #define IAP_GLOBAL_CTRL_MASK 0x00000000ffffffff /* The mask is for both the fixed and programmable porttions of the register. */ #define IA_GLOBAL_CTRL_MASK 0x00000007ffffffff #define IA_GLOBAL_OVF_CTRL 0x390 #define IA_GLOBAL_STATUS_FLAG_CONDCHG (1ULL << 63) #define IA_GLOBAL_STATUS_FLAG_OVFBUF (1ULL << 62) /* * Offcore response configuration. */ #define IA_OFFCORE_RSP0 0x1A6 #define IA_OFFCORE_RSP1 0x1A7 struct pmc_md_iaf_pmc { uint64_t pm_iaf_ctrl; }; struct pmc_md_iap_pmc { uint32_t pm_iap_evsel; uint64_t pm_iap_rsp; }; /* * Prototypes. */ int pmc_core_initialize(struct pmc_mdep *_md, int _maxcpu, int _version_override); void pmc_core_finalize(struct pmc_mdep *_md); int pmc_iaf_initialize(struct pmc_mdep *_md, int _maxcpu, int _npmc, int _width); void pmc_iaf_finalize(struct pmc_mdep *_md); int pmc_iap_initialize(struct pmc_mdep *_md, int _maxcpu, int _npmc, int _width, int _flags); void pmc_iap_finalize(struct pmc_mdep *_md); #endif /* _KERNEL */ #endif /* _DEV_HWPMC_CORE_H */ Index: head/sys/dev/hwpmc/hwpmc_logging.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_logging.c (revision 334595) +++ head/sys/dev/hwpmc/hwpmc_logging.c (revision 334596) @@ -1,1231 +1,1232 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Logging code for hwpmc(4) */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NUMA #define NDOMAINS vm_ndomains #define curdomain PCPU_GET(domain) #else #define NDOMAINS 1 #define curdomain 0 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) #define free_domain(addr, type) free(addr, type) #endif /* * Sysctl tunables */ SYSCTL_DECL(_kern_hwpmc); /* * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers. */ static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; #if (__FreeBSD_version < 1100000) TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size); #endif SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN, &pmclog_buffer_size, 0, "size of log buffers in kilobytes"); /* * kern.hwpmc.nbuffer -- number of global log buffers */ static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; #if (__FreeBSD_version < 1100000) TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu); #endif SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN, &pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu"); /* * Global log buffer list and associated spin lock. */ static struct mtx pmc_kthread_mtx; /* sleep lock */ #define PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do { \ (D)->plb_fence = ((char *) (buf)) + 1024*pmclog_buffer_size; \ (D)->plb_base = (D)->plb_ptr = ((char *) (buf)); \ (D)->plb_domain = domain; \ } while (0) #define PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do { \ (D)->plb_ptr = (D)->plb_base; \ } while (0) /* * Log file record constructors. */ #define _PMCLOG_TO_HEADER(T,L) \ ((PMCLOG_HEADER_MAGIC << 24) | \ (PMCLOG_TYPE_ ## T << 16) | \ ((L) & 0xFFFF)) /* reserve LEN bytes of space and initialize the entry header */ #define _PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ int _len = roundup((LEN), sizeof(uint32_t)); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ /* reserve LEN bytes of space and initialize the entry header */ #define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ int _len = roundup((LEN), sizeof(uint32_t)); \ spinlock_enter(); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ spinlock_exit(); \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ #define PMCLOG_RESERVE_SAFE(P,T,L) _PMCLOG_RESERVE_SAFE(P,T,L,return) #define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return) #define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \ error=ENOMEM;goto error) #define PMCLOG_EMIT32(V) do { *_le++ = (V); } while (0) #define PMCLOG_EMIT64(V) do { \ *_le++ = (uint32_t) ((V) & 0xFFFFFFFF); \ *_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF); \ } while (0) /* Emit a string. Caution: does NOT update _le, so needs to be last */ #define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0) #define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0) #define PMCLOG_DESPATCH_SAFE(PO) \ pmclog_release((PO)); \ } while (0) #define PMCLOG_DESPATCH_SCHED_LOCK(PO) \ pmclog_release_flags((PO), 0); \ } while (0) #define PMCLOG_DESPATCH(PO) \ pmclog_release((PO)); \ spinlock_exit(); \ } while (0) #define PMCLOG_DESPATCH_SYNC(PO) \ pmclog_schedule_io((PO), 1); \ spinlock_exit(); \ } while (0) /* * Assertions about the log file format. */ CTASSERT(sizeof(struct pmclog_callchain) == 8*4 + PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_closelog) == 4*4); CTASSERT(sizeof(struct pmclog_dropnotify) == 4*4); CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX + 4*4 + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) == 4*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4); CTASSERT(sizeof(struct pmclog_pmcattach) == 6*4 + PATH_MAX); CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 6*4); CTASSERT(sizeof(struct pmclog_pmcdetach) == 6*4); CTASSERT(sizeof(struct pmclog_proccsw) == 6*4 + 8); CTASSERT(sizeof(struct pmclog_procexec) == 6*4 + PATH_MAX + sizeof(uintfptr_t)); CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 6*4 + sizeof(uintfptr_t)); CTASSERT(sizeof(struct pmclog_procexit) == 6*4 + 8); CTASSERT(sizeof(struct pmclog_procfork) == 6*4); CTASSERT(sizeof(struct pmclog_sysexit) == 4*4); CTASSERT(sizeof(struct pmclog_userdata) == 4*4); /* * Log buffer structure */ struct pmclog_buffer { TAILQ_ENTRY(pmclog_buffer) plb_next; char *plb_base; char *plb_ptr; char *plb_fence; uint16_t plb_domain; } __aligned(CACHE_LINE_SIZE); /* * Prototypes */ static int pmclog_get_buffer(struct pmc_owner *po); static void pmclog_loop(void *arg); static void pmclog_release(struct pmc_owner *po); static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); static void pmclog_schedule_io(struct pmc_owner *po, int wakeup); static void pmclog_schedule_all(struct pmc_owner *po); static void pmclog_stop_kthread(struct pmc_owner *po); /* * Helper functions */ static inline void pmc_plb_rele_unlocked(struct pmclog_buffer *plb) { TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next); } static inline void pmc_plb_rele(struct pmclog_buffer *plb) { mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); pmc_plb_rele_unlocked(plb); mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); } /* * Get a log buffer */ static int pmclog_get_buffer(struct pmc_owner *po) { struct pmclog_buffer *plb; int domain; KASSERT(po->po_curbuf[curcpu] == NULL, ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); domain = curdomain; MPASS(pmc_dom_hdrs[domain]); mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb); #ifdef HWPMC_DEBUG if (plb) KASSERT(plb->plb_ptr == plb->plb_base && plb->plb_base < plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p " "base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); #endif po->po_curbuf[curcpu] = plb; /* update stats */ counter_u64_add(pmc_stats.pm_buffer_requests, 1); if (plb == NULL) counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1); return (plb ? 0 : ENOMEM); } struct pmclog_proc_init_args { struct proc *kthr; struct pmc_owner *po; bool exit; bool acted; }; int pmclog_proc_create(struct thread *td, void **handlep) { struct pmclog_proc_init_args *ia; int error; ia = malloc(sizeof(*ia), M_TEMP, M_WAITOK | M_ZERO); error = kproc_create(pmclog_loop, ia, &ia->kthr, RFHIGHPID, 0, "hwpmc: proc(%d)", td->td_proc->p_pid); if (error == 0) *handlep = ia; return (error); } void pmclog_proc_ignite(void *handle, struct pmc_owner *po) { struct pmclog_proc_init_args *ia; ia = handle; mtx_lock(&pmc_kthread_mtx); MPASS(!ia->acted); MPASS(ia->po == NULL); MPASS(!ia->exit); MPASS(ia->kthr != NULL); if (po == NULL) { ia->exit = true; } else { ia->po = po; KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread (%p) already present", __LINE__, po, po->po_kthread)); po->po_kthread = ia->kthr; } wakeup(ia); while (!ia->acted) msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogw", 0); mtx_unlock(&pmc_kthread_mtx); free(ia, M_TEMP); } /* * Log handler loop. * * This function is executed by each pmc owner's helper thread. */ static void pmclog_loop(void *arg) { struct pmclog_proc_init_args *ia; struct pmc_owner *po; struct pmclog_buffer *lb; struct proc *p; struct ucred *ownercred; struct ucred *mycred; struct thread *td; sigset_t unb; struct uio auio; struct iovec aiov; size_t nbytes; int error; td = curthread; SIGEMPTYSET(unb); SIGADDSET(unb, SIGHUP); (void)kern_sigprocmask(td, SIG_UNBLOCK, &unb, NULL, 0); ia = arg; MPASS(ia->kthr == curproc); MPASS(!ia->acted); mtx_lock(&pmc_kthread_mtx); while (ia->po == NULL && !ia->exit) msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogi", 0); if (ia->exit) { ia->acted = true; wakeup(ia); mtx_unlock(&pmc_kthread_mtx); kproc_exit(0); } MPASS(ia->po != NULL); po = ia->po; ia->acted = true; wakeup(ia); mtx_unlock(&pmc_kthread_mtx); ia = NULL; p = po->po_owner; mycred = td->td_ucred; PROC_LOCK(p); ownercred = crhold(p->p_ucred); PROC_UNLOCK(p); PMCDBG2(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread); KASSERT(po->po_kthread == curthread->td_proc, ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__, po, po->po_kthread, curthread->td_proc)); lb = NULL; /* * Loop waiting for I/O requests to be added to the owner * struct's queue. The loop is exited when the log file * is deconfigured. */ mtx_lock(&pmc_kthread_mtx); for (;;) { /* check if we've been asked to exit */ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) break; if (lb == NULL) { /* look for a fresh buffer to write */ mtx_lock_spin(&po->po_mtx); if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) { mtx_unlock_spin(&po->po_mtx); /* No more buffers and shutdown required. */ if (po->po_flags & PMC_PO_SHUTDOWN) break; (void) msleep(po, &pmc_kthread_mtx, PWAIT, "pmcloop", 250); continue; } TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); mtx_unlock_spin(&po->po_mtx); } mtx_unlock(&pmc_kthread_mtx); /* process the request */ PMCDBG3(LOG,WRI,2, "po=%p base=%p ptr=%p", po, lb->plb_base, lb->plb_ptr); /* change our thread's credentials before issuing the I/O */ aiov.iov_base = lb->plb_base; aiov.iov_len = nbytes = lb->plb_ptr - lb->plb_base; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = -1; auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; /* switch thread credentials -- see kern_ktrace.c */ td->td_ucred = ownercred; error = fo_write(po->po_file, &auio, ownercred, 0, td); td->td_ucred = mycred; if (error) { /* XXX some errors are recoverable */ /* send a SIGIO to the owner and exit */ PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); mtx_lock(&pmc_kthread_mtx); po->po_error = error; /* save for flush log */ PMCDBG2(LOG,WRI,2, "po=%p error=%d", po, error); break; } mtx_lock(&pmc_kthread_mtx); /* put the used buffer back into the global pool */ PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); lb = NULL; } wakeup_one(po->po_kthread); po->po_kthread = NULL; mtx_unlock(&pmc_kthread_mtx); /* return the current I/O buffer to the global pool */ if (lb) { PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } /* * Exit this thread, signalling the waiter */ crfree(ownercred); kproc_exit(0); } /* * Release and log entry and schedule an I/O if needed. */ static void pmclog_release_flags(struct pmc_owner *po, int wakeup) { struct pmclog_buffer *plb; plb = po->po_curbuf[curcpu]; KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, plb->plb_ptr, plb->plb_base)); KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, plb->plb_ptr, plb->plb_fence)); /* schedule an I/O if we've filled a buffer */ if (plb->plb_ptr >= plb->plb_fence) pmclog_schedule_io(po, wakeup); PMCDBG1(LOG,REL,1, "po=%p", po); } static void pmclog_release(struct pmc_owner *po) { pmclog_release_flags(po, 1); } /* * Attempt to reserve 'length' bytes of space in an owner's log * buffer. The function returns a pointer to 'length' bytes of space * if there was enough space or returns NULL if no space was * available. Non-null returns do so with the po mutex locked. The * caller must invoke pmclog_release() on the pmc owner structure * when done. */ static uint32_t * pmclog_reserve(struct pmc_owner *po, int length) { uintptr_t newptr, oldptr; uint32_t *lh; struct timespec ts; struct pmclog_buffer *plb, **pplb; PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length); KASSERT(length % sizeof(uint32_t) == 0, ("[pmclog,%d] length not a multiple of word size", __LINE__)); /* No more data when shutdown in progress. */ if (po->po_flags & PMC_PO_SHUTDOWN) return (NULL); pplb = &po->po_curbuf[curcpu]; if (*pplb == NULL && pmclog_get_buffer(po) != 0) goto fail; KASSERT(*pplb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); plb = *pplb; KASSERT(plb->plb_ptr >= plb->plb_base && plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); oldptr = (uintptr_t) plb->plb_ptr; newptr = oldptr + length; KASSERT(oldptr != (uintptr_t) NULL, ("[pmclog,%d] po=%p Null log buffer pointer", __LINE__, po)); /* * If we have space in the current buffer, return a pointer to * available space with the PO structure locked. */ if (newptr <= (uintptr_t) plb->plb_fence) { plb->plb_ptr = (char *) newptr; goto done; } /* * Otherwise, schedule the current buffer for output and get a * fresh buffer. */ pmclog_schedule_io(po, 0); if (pmclog_get_buffer(po) != 0) goto fail; plb = *pplb; KASSERT(plb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); KASSERT(plb->plb_ptr != NULL, ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__)); KASSERT(plb->plb_ptr == plb->plb_base && plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", __LINE__, po, plb->plb_ptr, plb->plb_base, plb->plb_fence)); oldptr = (uintptr_t) plb->plb_ptr; done: lh = (uint32_t *) oldptr; lh++; /* skip header */ getnanotime(&ts); /* fill in the timestamp */ *lh++ = ts.tv_sec & 0xFFFFFFFF; *lh++ = ts.tv_nsec & 0xFFFFFFF; return ((uint32_t *) oldptr); fail: return (NULL); } /* * Schedule an I/O. * * Transfer the current buffer to the helper kthread. */ static void pmclog_schedule_io(struct pmc_owner *po, int wakeup) { struct pmclog_buffer *plb; plb = po->po_curbuf[curcpu]; po->po_curbuf[curcpu] = NULL; KASSERT(plb != NULL, ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, po, plb->plb_ptr, plb->plb_base)); KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, po, plb->plb_ptr, plb->plb_fence)); PMCDBG1(LOG,SIO, 1, "po=%p", po); /* * Add the current buffer to the tail of the buffer list and * wakeup the helper. */ mtx_lock_spin(&po->po_mtx); TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next); mtx_unlock_spin(&po->po_mtx); if (wakeup) wakeup_one(po); } /* * Stop the helper kthread. */ static void pmclog_stop_kthread(struct pmc_owner *po) { mtx_lock(&pmc_kthread_mtx); po->po_flags &= ~PMC_PO_OWNS_LOGFILE; if (po->po_kthread != NULL) { PROC_LOCK(po->po_kthread); kern_psignal(po->po_kthread, SIGHUP); PROC_UNLOCK(po->po_kthread); } wakeup_one(po); while (po->po_kthread) msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0); mtx_unlock(&pmc_kthread_mtx); } /* * Public functions */ /* * Configure a log file for pmc owner 'po'. * * Parameter 'logfd' is a file handle referencing an open file in the * owner process. This file needs to have been opened for writing. */ int pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd) { struct proc *p; int error; sx_assert(&pmc_sx, SA_XLOCKED); PMCDBG2(LOG,CFG,1, "config po=%p logfd=%d", po, logfd); p = po->po_owner; /* return EBUSY if a log file was already present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) return (EBUSY); KASSERT(po->po_file == NULL, ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po, po->po_file)); /* get a reference to the file state */ error = fget_write(curthread, logfd, &cap_write_rights, &po->po_file); if (error) goto error; /* mark process as owning a log file */ po->po_flags |= PMC_PO_OWNS_LOGFILE; /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); /* create a log initialization entry */ PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE, sizeof(struct pmclog_initialize)); PMCLOG_EMIT32(PMC_VERSION); PMCLOG_EMIT32(md->pmd_cputype); + PMCLOG_EMITSTRING(pmc_cpuid, PMC_CPUID_LEN); PMCLOG_DESPATCH_SYNC(po); return (0); error: KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not " "stopped", __LINE__, po)); if (po->po_file) (void) fdrop(po->po_file, curthread); po->po_file = NULL; /* clear file and error state */ po->po_error = 0; po->po_flags &= ~PMC_PO_OWNS_LOGFILE; return (error); } /* * De-configure a log file. This will throw away any buffers queued * for this owner process. */ int pmclog_deconfigure_log(struct pmc_owner *po) { int error; struct pmclog_buffer *lb; PMCDBG1(LOG,CFG,1, "de-config po=%p", po); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EINVAL); KASSERT(po->po_sscount == 0, ("[pmclog,%d] po=%p still owning SS PMCs", __LINE__, po)); KASSERT(po->po_file != NULL, ("[pmclog,%d] po=%p no log file", __LINE__, po)); /* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */ pmclog_stop_kthread(po); KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not stopped", __LINE__, po)); /* return all queued log buffers to the global pool */ while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) { TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } for (int i = 0; i < mp_ncpus; i++) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); /* return the 'current' buffer to the global pool */ if ((lb = po->po_curbuf[curcpu]) != NULL) { PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); pmc_plb_rele(lb); } } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); /* drop a reference to the fd */ if (po->po_file != NULL) { error = fdrop(po->po_file, curthread); po->po_file = NULL; } else error = 0; po->po_error = 0; return (error); } /* * Flush a process' log buffer. */ int pmclog_flush(struct pmc_owner *po) { int error; PMCDBG1(LOG,FLS,1, "po=%p", po); /* * If there is a pending error recorded by the logger thread, * return that. */ if (po->po_error) return (po->po_error); error = 0; /* * Check that we do have an active log file. */ mtx_lock(&pmc_kthread_mtx); if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; goto error; } pmclog_schedule_all(po); error: mtx_unlock(&pmc_kthread_mtx); return (error); } static void pmclog_schedule_one_cond(void *arg) { struct pmc_owner *po = arg; struct pmclog_buffer *plb; int cpu; spinlock_enter(); cpu = curcpu; /* tell hardclock not to run again */ if (PMC_CPU_HAS_SAMPLES(cpu)) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); pmc_flush_samples(cpu); plb = po->po_curbuf[cpu]; if (plb && plb->plb_ptr != plb->plb_base) pmclog_schedule_io(po, 1); spinlock_exit(); } static void pmclog_schedule_all(struct pmc_owner *po) { /* * Schedule the current buffer if any and not empty. */ for (int i = 0; i < mp_ncpus; i++) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); pmclog_schedule_one_cond(po); } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } int pmclog_close(struct pmc_owner *po) { PMCDBG1(LOG,CLO,1, "po=%p", po); pmclog_process_closelog(po); mtx_lock(&pmc_kthread_mtx); /* * Initiate shutdown: no new data queued, * thread will close file on last block. */ po->po_flags |= PMC_PO_SHUTDOWN; /* give time for all to see */ DELAY(50); /* * Schedule the current buffer. */ pmclog_schedule_all(po); wakeup_one(po); mtx_unlock(&pmc_kthread_mtx); return (0); } void pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps) { int n, recordlen; uint32_t flags; struct pmc_owner *po; PMCDBG3(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid, ps->ps_nsamples); recordlen = offsetof(struct pmclog_callchain, pl_pc) + ps->ps_nsamples * sizeof(uintfptr_t); po = pm->pm_owner; flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags); PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen); PMCLOG_EMIT32(ps->ps_pid); PMCLOG_EMIT32(ps->ps_tid); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(flags); /* unused for now */ PMCLOG_EMIT32(0); for (n = 0; n < ps->ps_nsamples; n++) PMCLOG_EMITADDR(ps->ps_pc[n]); PMCLOG_DESPATCH_SAFE(po); } void pmclog_process_closelog(struct pmc_owner *po) { PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog)); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_dropnotify(struct pmc_owner *po) { PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify)); PMCLOG_DESPATCH(po); } void pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, const char *path) { int pathlen, recordlen; KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__)); pathlen = strlen(path) + 1; /* #bytes for path name */ recordlen = offsetof(struct pmclog_map_in, pl_pathname) + pathlen; PMCLOG_RESERVE(po, MAP_IN, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, uintfptr_t end) { KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__)); PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out)); PMCLOG_EMIT32(pid); PMCLOG_EMITADDR(start); PMCLOG_EMITADDR(end); PMCLOG_DESPATCH(po); } void pmclog_process_pmcallocate(struct pmc *pm) { struct pmc_owner *po; struct pmc_soft *ps; po = pm->pm_owner; PMCDBG1(LOG,ALL,1, "pm=%p", pm); if (PMC_TO_CLASS(pm) == PMC_CLASS_SOFT) { PMCLOG_RESERVE(po, PMCALLOCATEDYN, sizeof(struct pmclog_pmcallocatedyn)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); ps = pmc_soft_ev_acquire(pm->pm_event); if (ps != NULL) PMCLOG_EMITSTRING(ps->ps_ev.pm_ev_name,PMC_NAME_MAX); else PMCLOG_EMITNULLSTRING(PMC_NAME_MAX); pmc_soft_ev_release(ps); PMCLOG_DESPATCH_SYNC(po); } else { PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); PMCLOG_DESPATCH_SYNC(po); } } void pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path) { int pathlen, recordlen; struct pmc_owner *po; PMCDBG2(LOG,ATT,1,"pm=%p pid=%d", pm, pid); po = pm->pm_owner; pathlen = strlen(path) + 1; /* #bytes for the string */ recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PMCATTACH, recordlen); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_EMIT32(0); PMCLOG_EMITSTRING(path, pathlen); PMCLOG_DESPATCH_SYNC(po); } void pmclog_process_pmcdetach(struct pmc *pm, pid_t pid) { struct pmc_owner *po; PMCDBG2(LOG,ATT,1,"!pm=%p pid=%d", pm, pid); po = pm->pm_owner; PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH_SYNC(po); } /* * Log a context switch event to the log file. */ void pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v, struct thread *td) { struct pmc_owner *po; KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW, ("[pmclog,%d] log-process-csw called gratuitously", __LINE__)); PMCDBG3(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, v); po = pm->pm_owner; PMCLOG_RESERVE_SAFE(po, PROCCSW, sizeof(struct pmclog_proccsw)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT64(v); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_EMIT32(td->td_tid); PMCLOG_DESPATCH_SCHED_LOCK(po); } void pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid, uintfptr_t startaddr, char *path) { int pathlen, recordlen; PMCDBG3(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path); pathlen = strlen(path) + 1; /* #bytes for the path */ recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen; PMCLOG_RESERVE(po, PROCEXEC, recordlen); PMCLOG_EMIT32(pid); PMCLOG_EMIT32(pmid); PMCLOG_EMIT32(0); PMCLOG_EMITADDR(startaddr); PMCLOG_EMITSTRING(path,pathlen); PMCLOG_DESPATCH(po); } /* * Log a process exit event (and accumulated pmc value) to the log file. */ void pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_owner *po; ri = PMC_TO_ROWINDEX(pm); PMCDBG3(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, pp->pp_pmcs[ri].pp_pmcval); po = pm->pm_owner; PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pp->pp_proc->p_pid); PMCLOG_EMIT32(0); PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval); PMCLOG_DESPATCH(po); } /* * Log a fork event. */ void pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid) { PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork)); PMCLOG_EMIT32(oldpid); PMCLOG_EMIT32(newpid); PMCLOG_DESPATCH(po); } /* * Log a process exit event of the form suitable for system-wide PMCs. */ void pmclog_process_sysexit(struct pmc_owner *po, pid_t pid) { PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit)); PMCLOG_EMIT32(pid); PMCLOG_DESPATCH(po); } /* * Write a user log entry. */ int pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl) { int error; PMCDBG2(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata); error = 0; PMCLOG_RESERVE_WITH_ERROR(po, USERDATA, sizeof(struct pmclog_userdata)); PMCLOG_EMIT32(wl->pm_userdata); PMCLOG_DESPATCH(po); error: return (error); } /* * Initialization. * * Create a pool of log buffers and initialize mutexes. */ void pmclog_initialize() { int domain; struct pmclog_buffer *plb; if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) { (void) printf("hwpmc: tunable logbuffersize=%d must be " "greater than zero and less than or equal to 16MB.\n", pmclog_buffer_size); pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } if (pmc_nlogbuffers_pcpu <= 0) { (void) printf("hwpmc: tunable nlogbuffers=%d must be greater " "than zero.\n", pmc_nlogbuffers_pcpu); pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; } if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) { (void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n", pmc_nlogbuffers_pcpu*pmclog_buffer_size); pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } for (domain = 0; domain < NDOMAINS; domain++) { int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus; int total = ncpus*pmc_nlogbuffers_pcpu; plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO); pmc_dom_hdrs[domain]->pdbh_plbs = plb; for (int i = 0; i < total; i++, plb++) { void *buf; buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain, M_WAITOK|M_ZERO); PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain); pmc_plb_rele_unlocked(plb); } } mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF); } /* * Shutdown logging. * * Destroy mutexes and release memory back the to free pool. */ void pmclog_shutdown() { struct pmclog_buffer *plb; int domain; mtx_destroy(&pmc_kthread_mtx); for (domain = 0; domain < NDOMAINS; domain++) { while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) { TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); free(plb->plb_base, M_PMC); } free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC); } } Index: head/sys/i386/include/pmc_mdep.h =================================================================== --- head/sys/i386/include/pmc_mdep.h (revision 334595) +++ head/sys/i386/include/pmc_mdep.h (revision 334596) @@ -1,167 +1,166 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2005,2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PMC_MDEP_H #define _MACHINE_PMC_MDEP_H 1 #ifdef _KERNEL struct pmc_mdep; #endif /* * On the i386 platform we support the following PMCs. * * TSC The timestamp counter * K7 AMD Athlon XP/MP and other 32 bit processors. * K8 AMD Athlon64 and Opteron PMCs in 32 bit mode. * PIV Intel P4/HTT and P4/EMT64 * PPRO Intel Pentium Pro, Pentium-II, Pentium-III, Celeron and * Pentium-M processors * PENTIUM Intel Pentium MMX. * IAP Intel Core/Core2/Atom programmable PMCs. * IAF Intel fixed-function PMCs. * UCP Intel Uncore programmable PMCs. * UCF Intel Uncore fixed-function PMCs. */ #include /* K7 and K8 */ #include #include #include /* * Intel processors implementing V2 and later of the Intel performance * measurement architecture have PMCs of the following classes: TSC, * IAF, IAP, UCF and UCP. */ #define PMC_MDEP_CLASS_INDEX_TSC 1 #define PMC_MDEP_CLASS_INDEX_K7 2 #define PMC_MDEP_CLASS_INDEX_K8 2 #define PMC_MDEP_CLASS_INDEX_IAP 2 #define PMC_MDEP_CLASS_INDEX_IAF 3 #define PMC_MDEP_CLASS_INDEX_UCP 4 #define PMC_MDEP_CLASS_INDEX_UCF 5 /* * Architecture specific extensions to structures. */ union pmc_md_op_pmcallocate { struct pmc_md_amd_op_pmcallocate pm_amd; - struct pmc_md_iaf_op_pmcallocate pm_iaf; struct pmc_md_iap_op_pmcallocate pm_iap; struct pmc_md_ucf_op_pmcallocate pm_ucf; struct pmc_md_ucp_op_pmcallocate pm_ucp; uint64_t __pad[4]; }; /* Logging */ #define PMCLOG_READADDR PMCLOG_READ32 #define PMCLOG_EMITADDR PMCLOG_EMIT32 #ifdef _KERNEL /* MD extension for 'struct pmc' */ union pmc_md_pmc { struct pmc_md_amd_pmc pm_amd; struct pmc_md_iaf_pmc pm_iaf; struct pmc_md_iap_pmc pm_iap; struct pmc_md_ucf_pmc pm_ucf; struct pmc_md_ucp_pmc pm_ucp; }; struct pmc; struct pmc_mdep; #define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_eip) #define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_ebp) /* * The layout of the stack frame on entry into the NMI handler depends on * whether a privilege level change (and consequent stack switch) was * required for entry. * * When processing an interrupt when in user mode, the processor switches * stacks, and saves the user mode stack pointer on the kernel stack. The * user mode stack pointer is then available to the interrupt handler * at frame->tf_esp. * * When processing an interrupt while in kernel mode, the processor * continues to use the existing (kernel) stack. Therefore we determine * the stack pointer for the interrupted kernel procedure by adding an * offset to the current frame pointer. */ #define PMC_TRAPFRAME_TO_USER_SP(TF) ((TF)->tf_esp) #define PMC_TRAPFRAME_TO_KERNEL_SP(TF) ((uintptr_t) &((TF)->tf_esp)) #define PMC_IN_KERNEL_STACK(S,START,END) \ ((S) >= (START) && (S) < (END)) #define PMC_IN_KERNEL(va) INKERNEL(va) #define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS) #define PMC_IN_TRAP_HANDLER(PC) \ ((PC) >= (uintptr_t)start_exceptions + setidt_disp && \ (PC) < (uintptr_t) end_exceptions + setidt_disp) #define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \ (((I) & 0x00ffffff) == 0xe58955) /* pushl %ebp; movl %esp,%ebp */ #define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \ (((I) & 0x0000ffff) == 0xe589) /* movl %esp,%ebp */ #define PMC_AT_FUNCTION_EPILOGUE_RET(I) \ (((I) & 0xFF) == 0xC3) /* ret */ /* Build a fake kernel trapframe from current instruction pointer. */ #define PMC_FAKE_TRAPFRAME(TF) \ do { \ (TF)->tf_cs = 0; (TF)->tf_eflags = 0; \ __asm __volatile("movl %%ebp,%0" : "=r" ((TF)->tf_ebp)); \ __asm __volatile("movl %%esp,%0" : "=r" ((TF)->tf_esp)); \ __asm __volatile("call 1f \n\t1: pop %0" : "=r"((TF)->tf_eip)); \ } while (0) /* * Prototypes */ void start_exceptions(void), end_exceptions(void); struct pmc_mdep *pmc_amd_initialize(void); void pmc_amd_finalize(struct pmc_mdep *_md); struct pmc_mdep *pmc_intel_initialize(void); void pmc_intel_finalize(struct pmc_mdep *_md); #endif /* _KERNEL */ #endif /* _MACHINE_PMC_MDEP_H */ Index: head/sys/sys/pmc.h =================================================================== --- head/sys/sys/pmc.h (revision 334595) +++ head/sys/sys/pmc.h (revision 334596) @@ -1,1219 +1,1220 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #include #include #ifdef _KERNEL #include #include #endif #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ -#define PMC_VERSION_MAJOR 0x05 +#define PMC_VERSION_MAJOR 0x06 #define PMC_VERSION_MINOR 0x01 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) +#define PMC_CPUID_LEN 64 +/* cpu model name for pmu lookup */ +extern char pmc_cpuid[PMC_CPUID_LEN]; + /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K7, 0x00, "AMD K7") \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(INTEL_P5, 0x80, "Intel Pentium") \ __PMC_CPU(INTEL_P6, 0x81, "Intel Pentium Pro") \ __PMC_CPU(INTEL_CL, 0x82, "Intel Celeron") \ __PMC_CPU(INTEL_PII, 0x83, "Intel Pentium II") \ __PMC_CPU(INTEL_PIII, 0x84, "Intel Pentium III") \ __PMC_CPU(INTEL_PM, 0x85, "Intel Pentium M") \ __PMC_CPU(INTEL_PIV, 0x86, "Intel Pentium IV") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \ __PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \ __PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ __PMC_CPU(MIPS_74K, 0x202, "MIPS 74K") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(GENERIC, 0x400, "Generic") \ __PMC_CPU(ARMV7_CORTEX_A5, 0x500, "ARMv7 Cortex A5") \ __PMC_CPU(ARMV7_CORTEX_A7, 0x501, "ARMv7 Cortex A7") \ __PMC_CPU(ARMV7_CORTEX_A8, 0x502, "ARMv7 Cortex A8") \ __PMC_CPU(ARMV7_CORTEX_A9, 0x503, "ARMv7 Cortex A9") \ __PMC_CPU(ARMV7_CORTEX_A15, 0x504, "ARMv7 Cortex A15") \ __PMC_CPU(ARMV7_CORTEX_A17, 0x505, "ARMv7 Cortex A17") \ __PMC_CPU(ARMV8_CORTEX_A53, 0x600, "ARMv8 Cortex A53") \ __PMC_CPU(ARMV8_CORTEX_A57, 0x601, "ARMv8 Cortex A57") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K7 #define PMC_CPU_LAST PMC_CPU_GENERIC /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K7, 0x01, "AMD K7 performance counters") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ __PMC_CLASS(P5, 0x03, "Intel Pentium counters") \ __PMC_CLASS(P6, 0x04, "Intel Pentium Pro counters") \ __PMC_CLASS(P4, 0x05, "Intel Pentium-IV counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ __PMC_CLASS(UCP, 0x09, "Intel Uncore programmable") \ __PMC_CLASS(XSCALE, 0x0A, "Intel XScale counters") \ __PMC_CLASS(MIPS24K, 0x0B, "MIPS 24K") \ __PMC_CLASS(OCTEON, 0x0C, "Cavium Octeon") \ __PMC_CLASS(PPC7450, 0x0D, "Motorola MPC7450 class") \ __PMC_CLASS(PPC970, 0x0E, "IBM PowerPC 970 class") \ __PMC_CLASS(SOFT, 0x0F, "Software events") \ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \ __PMC_CLASS(E500, 0x13, "Freescale e500 class") enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) PMC_CLASS_##S = V, __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_E500 /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparison sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_CASCADE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ -#define PMC_F_FORCE 0x00000001 /*OP ADMIN force operation */ +#define PMC_F_UNUSED1 0x00000001 /* unused */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ -#define PMC_F_KGMON 0x00000040 /*OP ALLOCATE kgmon(8) profiling */ + /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ #define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * - * +--------+----------+-----------+-----------+ - * | CPU | PMC MODE | PMC CLASS | ROW INDEX | - * +--------+----------+-----------+-----------+ + * +-----------------------+-------+-----------+ + * | CPU | PMC MODE | CLASS | ROW INDEX | + * +-----------------------+-------+-----------+ * - * where each field is 8 bits wide. Field 'CPU' is set to the - * requested CPU for system-wide PMCs or PMC_CPU_ANY for process-mode - * PMCs. Field 'PMC MODE' is the allocated PMC mode. Field 'PMC - * CLASS' is the class of the PMC. Field 'ROW INDEX' is the row index - * for the PMC. + * where CPU is 12 bits, MODE 8, CLASS 4, and ROW INDEX 8 Field 'CPU' + * is set to the requested CPU for system-wide PMCs or PMC_CPU_ANY for + * process-mode PMCs. Field 'PMC MODE' is the allocated PMC mode. + * Field 'PMC CLASS' is the class of the PMC. Field 'ROW INDEX' is the + * row index for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) -#define PMC_ID_TO_CLASS(ID) (((ID) & 0xFF00) >> 8) -#define PMC_ID_TO_MODE(ID) (((ID) & 0xFF0000) >> 16) -#define PMC_ID_TO_CPU(ID) (((ID) & 0xFF000000) >> 24) +#define PMC_ID_TO_CLASS(ID) (((ID) & 0xF00) >> 8) +#define PMC_ID_TO_MODE(ID) (((ID) & 0xFF000) >> 12) +#define PMC_ID_TO_CPU(ID) (((ID) & 0xFFF00000) >> 20) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ - ((((CPU) & 0xFF) << 24) | (((MODE) & 0xFF) << 16) | \ - (((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF)) + ((((CPU) & 0xFFF) << 20) | (((MODE) & 0xFF) << 12) | \ + (((CLASS) & 0xF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ #ifdef _KERNEL struct pmc_driverstats { counter_u64_t pm_intr_ignored; /* #interrupts ignored */ counter_u64_t pm_intr_processed; /* #interrupts processed */ counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */ counter_u64_t pm_syscalls; /* #syscalls */ counter_u64_t pm_syscall_errors; /* #syscalls with errors */ counter_u64_t pm_buffer_requests; /* #buffer requests */ counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ counter_u64_t pm_log_sweeps; /* #sample buffer processing passes */ counter_u64_t pm_merges; /* merged k+u */ counter_u64_t pm_overwrites; /* UR overwrites */ }; #endif struct pmc_op_getdriverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ unsigned int pm_intr_processed; /* #interrupts processed */ unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ unsigned int pm_syscalls; /* #syscalls */ unsigned int pm_syscall_errors; /* #syscalls with errors */ unsigned int pm_buffer_requests; /* #buffer requests */ unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ unsigned int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address associated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 256 #define PMC_NLOGBUFFERS_PCPU 32 #define PMC_NSAMPLES 256 #define PMC_CALLCHAIN_DEPTH 128 #define PMC_THREADLIST_MAX 128 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) * (g) - global_epoch_preempt (epoch) * (p) - pmc_sx (sx) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc_pcpu_state { uint8_t pps_stalled; uint8_t pps_cpustate; } __aligned(CACHE_LINE_SIZE); struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; struct pmc_pcpu_state *pm_pcpu_state; volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ counter_u64_t pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ uint32_t pm_overflowcnt; /* count overflow interrupts */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ enum pmc_class pm_class; /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_threadpmcstate * * Record per-PMC, per-thread state. */ struct pmc_threadpmcstate { pmc_value_t pt_pmcval; /* per-thread reload count */ }; /* * struct pmc_thread * * Record a 'target' thread being profiled. */ struct pmc_thread { LIST_ENTRY(pmc_thread) pt_next; /* linked list */ struct thread *pt_td; /* target thread */ struct pmc_threadpmcstate pt_pmcs[]; /* per-PMC state */ }; /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ LIST_HEAD(,pmc_thread) pp_tds; /* list of threads */ struct mtx *pp_tdslock; /* lock on pp_tds thread list */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target process */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ CK_LIST_ENTRY(pmc_owner) po_ssnext; /* (g/p) list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint16_t ps_nsamples_actual; uint16_t ps_cpu; /* cpu number */ uint16_t ps_flags; /* other flags */ lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF) struct pmc_samplebuffer { struct pmc_sample * volatile ps_read; /* read pointer */ struct pmc_sample * volatile ps_write; /* write pointer */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample *ps_fence; /* one beyond ps_samples[] */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[3]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri); int (*pcd_stop_pmc)(int _cpu, int _ri); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* per-cpu initialization and finalization */ int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(int _cpu, struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_driverstats pmc_stats; - -/* cpu model name for pmu lookup */ -extern char pmc_cpuid[64]; #if defined(HWPMC_DEBUG) #include /* debug flags, major flag groups */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define KTR_PMC KTR_SUBSYS #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG0(M, N, L, F) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR0(KTR_PMC, #M ":" #N ":" #L ": " F); \ } while (0) #define PMCDBG1(M, N, L, F, p1) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR1(KTR_PMC, #M ":" #N ":" #L ": " F, p1); \ } while (0) #define PMCDBG2(M, N, L, F, p1, p2) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR2(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2); \ } while (0) #define PMCDBG3(M, N, L, F, p1, p2, p3) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR3(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3); \ } while (0) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR4(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4);\ } while (0) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR5(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5); \ } while (0) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR6(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5, p6); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define PMCDBG0(M, N, L, F) /* nothing */ #define PMCDBG1(M, N, L, F, p1) #define PMCDBG2(M, N, L, F, p1, p2) #define PMCDBG3(M, N, L, F, p1, p2, p3) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _cpu, int _ring, struct pmc *_pm, struct trapframe *_tf, int _inuserspace); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); void pmc_flush_samples(int cpu); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */ Index: head/sys/sys/pmclog.h =================================================================== --- head/sys/sys/pmclog.h (revision 334595) +++ head/sys/sys/pmclog.h (revision 334596) @@ -1,290 +1,291 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2007, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMCLOG_H_ #define _SYS_PMCLOG_H_ #include enum pmclog_type { /* V1 ABI */ PMCLOG_TYPE_CLOSELOG = 1, PMCLOG_TYPE_DROPNOTIFY = 2, PMCLOG_TYPE_INITIALIZE = 3, PMCLOG_TYPE_MAPPINGCHANGE = 4, /* unused in v1 */ PMCLOG_TYPE_PMCALLOCATE = 5, PMCLOG_TYPE_PMCATTACH = 6, PMCLOG_TYPE_PMCDETACH = 7, PMCLOG_TYPE_PROCCSW = 8, PMCLOG_TYPE_PROCEXEC = 9, PMCLOG_TYPE_PROCEXIT = 10, PMCLOG_TYPE_PROCFORK = 11, PMCLOG_TYPE_SYSEXIT = 12, PMCLOG_TYPE_USERDATA = 13, /* * V2 ABI * * The MAP_{IN,OUT} event types obsolete the MAPPING_CHANGE * event type. The CALLCHAIN event type obsoletes the * PCSAMPLE event type. */ PMCLOG_TYPE_MAP_IN = 14, PMCLOG_TYPE_MAP_OUT = 15, PMCLOG_TYPE_CALLCHAIN = 16, /* * V3 ABI * * New variant of PMCLOG_TYPE_PMCALLOCATE for dynamic event. */ PMCLOG_TYPE_PMCALLOCATEDYN = 17 }; /* * A log entry descriptor comprises of a 32 bit header and a 64 bit * time stamp followed by as many 32 bit words are required to record * the event. * * Header field format: * * 31 24 16 0 * +------------+------------+-----------------------------------+ * | MAGIC | TYPE | LENGTH | * +------------+------------+-----------------------------------+ * * MAGIC is the constant PMCLOG_HEADER_MAGIC. * TYPE contains a value of type enum pmclog_type. * LENGTH contains the length of the event record, in bytes. */ #define PMCLOG_ENTRY_HEADER \ uint32_t pl_header; \ uint32_t pl_ts_sec; \ uint32_t pl_ts_nsec; /* * The following structures are used to describe the size of each kind * of log entry to sizeof(). To keep the compiler from adding * padding, the fields of each structure are aligned to their natural * boundaries, and the structures are marked as 'packed'. * * The actual reading and writing of the log file is always in terms * of 4 byte quantities. */ struct pmclog_callchain { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uint32_t pl_tid; uint32_t pl_pmcid; uint32_t pl_cpuflags; uint32_t pl_cpuflags2; /* 8 byte aligned */ uintptr_t pl_pc[PMC_CALLCHAIN_DEPTH_MAX]; } __packed; #define PMC_CALLCHAIN_CPUFLAGS_TO_CPU(CF) (((CF) >> 16) & 0xFFFF) #define PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(CF) ((CF) & PMC_CC_F_USERSPACE) #define PMC_CALLCHAIN_TO_CPUFLAGS(CPU,FLAGS) \ (((CPU) << 16) | ((FLAGS) & 0xFFFF)) struct pmclog_closelog { PMCLOG_ENTRY_HEADER uint32_t pl_pad; }; struct pmclog_dropnotify { PMCLOG_ENTRY_HEADER uint32_t pl_pad; }; struct pmclog_initialize { PMCLOG_ENTRY_HEADER uint32_t pl_version; /* driver version */ uint32_t pl_cpu; /* enum pmc_cputype */ uint32_t pl_pad; + char pl_cpuid[PMC_CPUID_LEN]; } __packed; struct pmclog_map_in { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uintfptr_t pl_start; /* 8 byte aligned */ char pl_pathname[PATH_MAX]; } __packed; struct pmclog_map_out { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uintfptr_t pl_start; /* 8 byte aligned */ uintfptr_t pl_end; } __packed; struct pmclog_pmcallocate { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_event; uint32_t pl_flags; } __packed; struct pmclog_pmcattach { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; char pl_pathname[PATH_MAX]; } __packed; struct pmclog_pmcdetach { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; } __packed; struct pmclog_proccsw { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint64_t pl_value; /* keep 8 byte aligned */ uint32_t pl_pid; uint32_t pl_tid; } __packed; struct pmclog_procexec { PMCLOG_ENTRY_HEADER uint32_t pl_pid; uint32_t pl_pmcid; uint32_t pl_pad; uintfptr_t pl_start; /* keep 8 byte aligned */ char pl_pathname[PATH_MAX]; } __packed; struct pmclog_procexit { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_pid; uint32_t pl_pad; uint64_t pl_value; /* keep 8 byte aligned */ } __packed; struct pmclog_procfork { PMCLOG_ENTRY_HEADER uint32_t pl_oldpid; uint32_t pl_newpid; uint32_t pl_pad; } __packed; struct pmclog_sysexit { PMCLOG_ENTRY_HEADER uint32_t pl_pid; } __packed; struct pmclog_userdata { PMCLOG_ENTRY_HEADER uint32_t pl_userdata; } __packed; struct pmclog_pmcallocatedyn { PMCLOG_ENTRY_HEADER uint32_t pl_pmcid; uint32_t pl_event; uint32_t pl_flags; char pl_evname[PMC_NAME_MAX]; } __packed; union pmclog_entry { /* only used to size scratch areas */ struct pmclog_callchain pl_cc; struct pmclog_closelog pl_cl; struct pmclog_dropnotify pl_dn; struct pmclog_initialize pl_i; struct pmclog_map_in pl_mi; struct pmclog_map_out pl_mo; struct pmclog_pmcallocate pl_a; struct pmclog_pmcallocatedyn pl_ad; struct pmclog_pmcattach pl_t; struct pmclog_pmcdetach pl_d; struct pmclog_proccsw pl_c; struct pmclog_procexec pl_x; struct pmclog_procexit pl_e; struct pmclog_procfork pl_f; struct pmclog_sysexit pl_se; struct pmclog_userdata pl_u; }; #define PMCLOG_HEADER_MAGIC 0xEEU #define PMCLOG_HEADER_TO_LENGTH(H) \ ((H) & 0x0000FFFF) #define PMCLOG_HEADER_TO_TYPE(H) \ (((H) & 0x00FF0000) >> 16) #define PMCLOG_HEADER_TO_MAGIC(H) \ (((H) & 0xFF000000) >> 24) #define PMCLOG_HEADER_CHECK_MAGIC(H) \ (PMCLOG_HEADER_TO_MAGIC(H) == PMCLOG_HEADER_MAGIC) #ifdef _KERNEL /* * Prototypes */ int pmclog_configure_log(struct pmc_mdep *_md, struct pmc_owner *_po, int _logfd); int pmclog_deconfigure_log(struct pmc_owner *_po); int pmclog_flush(struct pmc_owner *_po); int pmclog_close(struct pmc_owner *_po); void pmclog_initialize(void); int pmclog_proc_create(struct thread *td, void **handlep); void pmclog_proc_ignite(void *handle, struct pmc_owner *po); void pmclog_process_callchain(struct pmc *_pm, struct pmc_sample *_ps); void pmclog_process_closelog(struct pmc_owner *po); void pmclog_process_dropnotify(struct pmc_owner *po); void pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, const char *path); void pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, uintfptr_t end); void pmclog_process_pmcallocate(struct pmc *_pm); void pmclog_process_pmcattach(struct pmc *_pm, pid_t _pid, char *_path); void pmclog_process_pmcdetach(struct pmc *_pm, pid_t _pid); void pmclog_process_proccsw(struct pmc *_pm, struct pmc_process *_pp, pmc_value_t _v, struct thread *); void pmclog_process_procexec(struct pmc_owner *_po, pmc_id_t _pmid, pid_t _pid, uintfptr_t _startaddr, char *_path); void pmclog_process_procexit(struct pmc *_pm, struct pmc_process *_pp); void pmclog_process_procfork(struct pmc_owner *_po, pid_t _oldpid, pid_t _newpid); void pmclog_process_sysexit(struct pmc_owner *_po, pid_t _pid); int pmclog_process_userlog(struct pmc_owner *_po, struct pmc_op_writelog *_wl); void pmclog_shutdown(void); #endif /* _KERNEL */ #endif /* _SYS_PMCLOG_H_ */