Index: user/fabient/svctest/usr.sbin/pmcstat/Makefile =================================================================== --- user/fabient/svctest/usr.sbin/pmcstat/Makefile (revision 197318) +++ user/fabient/svctest/usr.sbin/pmcstat/Makefile (revision 197319) @@ -1,15 +1,15 @@ # # $FreeBSD$ # PROG= pmcstat MAN= pmcstat.8 DPADD= ${LIBELF} ${LIBKVM} ${LIBPMC} ${LIBM} LDADD= -lelf -lkvm -lpmc -lm WARNS?= 6 -SRCS= pmcstat.c pmcstat.h pmcstat_log.c +SRCS= pmcstat.c pmcstat.h pmcstat_log.c pmcstat_logct.c .include Index: user/fabient/svctest/usr.sbin/pmcstat/pmcstat.c =================================================================== --- user/fabient/svctest/usr.sbin/pmcstat/pmcstat.c (revision 197318) +++ user/fabient/svctest/usr.sbin/pmcstat/pmcstat.c (revision 197319) @@ -1,1326 +1,1343 @@ /*- * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pmcstat.h" /* * A given invocation of pmcstat(8) can manage multiple PMCs of both * the system-wide and per-process variety. Each of these could be in * 'counting mode' or in 'sampling mode'. * * For 'counting mode' PMCs, pmcstat(8) will periodically issue a * pmc_read() at the configured time interval and print out the value * of the requested PMCs. * * For 'sampling mode' PMCs it can log to a file for offline analysis, * or can analyse sampling data "on the fly", either by converting * samples to printed textual form or by creating gprof(1) compatible * profiles, one per program executed. When creating gprof(1) * profiles it can optionally merge entries from multiple processes * for a given executable into a single profile file. * * pmcstat(8) can also execute a command line and attach PMCs to the * resulting child process. The protocol used is as follows: * * - parent creates a socketpair for two way communication and * fork()s. * - subsequently: * * /Parent/ /Child/ * * - Wait for childs token. * - Sends token. * - Awaits signal to start. * - Attaches PMCs to the child's pid * and starts them. Sets up * monitoring for the child. * - Signals child to start. * - Recieves signal, attempts exec(). * * After this point normal processing can happen. */ /* Globals */ int pmcstat_interrupt = 0; int pmcstat_displayheight = DEFAULT_DISPLAY_HEIGHT; int pmcstat_sockpair[NSOCKPAIRFD]; int pmcstat_kq; kvm_t *pmcstat_kvm; struct kinfo_proc *pmcstat_plist; void pmcstat_attach_pmcs(struct pmcstat_args *a) { struct pmcstat_ev *ev; struct pmcstat_target *pt; int count; /* Attach all process PMCs to target processes. */ count = 0; STAILQ_FOREACH(ev, &a->pa_events, ev_next) { if (PMC_IS_SYSTEM_MODE(ev->ev_mode)) continue; SLIST_FOREACH(pt, &a->pa_targets, pt_next) if (pmc_attach(ev->ev_pmcid, pt->pt_pid) == 0) count++; else if (errno != ESRCH) err(EX_OSERR, "ERROR: cannot attach pmc " "\"%s\" to process %d", ev->ev_name, (int) pt->pt_pid); } if (count == 0) errx(EX_DATAERR, "ERROR: No processes were attached to."); } void pmcstat_cleanup(struct pmcstat_args *a) { struct pmcstat_ev *ev, *tmp; /* release allocated PMCs. */ STAILQ_FOREACH_SAFE(ev, &a->pa_events, ev_next, tmp) if (ev->ev_pmcid != PMC_ID_INVALID) { if (pmc_stop(ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: cannot stop pmc 0x%x " "\"%s\"", ev->ev_pmcid, ev->ev_name); if (pmc_release(ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: cannot release pmc " "0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); free(ev->ev_name); free(ev->ev_spec); STAILQ_REMOVE(&a->pa_events, ev, pmcstat_ev, ev_next); free(ev); } /* de-configure the log file if present. */ if (a->pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) (void) pmc_configure_logfile(-1); if (a->pa_logparser) { pmclog_close(a->pa_logparser); a->pa_logparser = NULL; } if (a->pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) pmcstat_shutdown_logging(a); } void pmcstat_clone_event_descriptor(struct pmcstat_args *a, struct pmcstat_ev *ev, uint32_t cpumask) { int cpu; struct pmcstat_ev *ev_clone; while ((cpu = ffs(cpumask)) > 0) { cpu--; if ((ev_clone = malloc(sizeof(*ev_clone))) == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory"); (void) memset(ev_clone, 0, sizeof(*ev_clone)); ev_clone->ev_count = ev->ev_count; ev_clone->ev_cpu = cpu; ev_clone->ev_cumulative = ev->ev_cumulative; ev_clone->ev_flags = ev->ev_flags; ev_clone->ev_mode = ev->ev_mode; ev_clone->ev_name = strdup(ev->ev_name); ev_clone->ev_pmcid = ev->ev_pmcid; ev_clone->ev_saved = ev->ev_saved; ev_clone->ev_spec = strdup(ev->ev_spec); STAILQ_INSERT_TAIL(&a->pa_events, ev_clone, ev_next); cpumask &= ~(1 << cpu); } } void pmcstat_create_process(struct pmcstat_args *a) { char token; pid_t pid; struct kevent kev; struct pmcstat_target *pt; if (socketpair(AF_UNIX, SOCK_STREAM, 0, pmcstat_sockpair) < 0) err(EX_OSERR, "ERROR: cannot create socket pair"); switch (pid = fork()) { case -1: err(EX_OSERR, "ERROR: cannot fork"); /*NOTREACHED*/ case 0: /* child */ (void) close(pmcstat_sockpair[PARENTSOCKET]); /* Write a token to tell our parent we've started executing. */ if (write(pmcstat_sockpair[CHILDSOCKET], "+", 1) != 1) err(EX_OSERR, "ERROR (child): cannot write token"); /* Wait for our parent to signal us to start. */ if (read(pmcstat_sockpair[CHILDSOCKET], &token, 1) < 0) err(EX_OSERR, "ERROR (child): cannot read token"); (void) close(pmcstat_sockpair[CHILDSOCKET]); /* exec() the program requested */ execvp(*a->pa_argv, a->pa_argv); /* and if that fails, notify the parent */ kill(getppid(), SIGCHLD); err(EX_OSERR, "ERROR: execvp \"%s\" failed", *a->pa_argv); /*NOTREACHED*/ default: /* parent */ (void) close(pmcstat_sockpair[CHILDSOCKET]); break; } /* Ask to be notified via a kevent when the target process exits. */ EV_SET(&kev, pid, EVFILT_PROC, EV_ADD|EV_ONESHOT, NOTE_EXIT, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: cannot monitor child process %d", pid); if ((pt = malloc(sizeof(*pt))) == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); pt->pt_pid = pid; SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); /* Wait for the child to signal that its ready to go. */ if (read(pmcstat_sockpair[PARENTSOCKET], &token, 1) < 0) err(EX_OSERR, "ERROR (parent): cannot read token"); return; } void pmcstat_find_targets(struct pmcstat_args *a, const char *spec) { int n, nproc, pid, rv; struct pmcstat_target *pt; char errbuf[_POSIX2_LINE_MAX], *end; static struct kinfo_proc *kp; regex_t reg; regmatch_t regmatch; /* First check if we've been given a process id. */ pid = strtol(spec, &end, 0); if (end != spec && pid >= 0) { if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = pid; SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); return; } /* Otherwise treat arg as a regular expression naming processes. */ if (pmcstat_kvm == NULL) { if ((pmcstat_kvm = kvm_openfiles(NULL, "/dev/null", NULL, 0, errbuf)) == NULL) err(EX_OSERR, "ERROR: Cannot open kernel \"%s\"", errbuf); if ((pmcstat_plist = kvm_getprocs(pmcstat_kvm, KERN_PROC_PROC, 0, &nproc)) == NULL) err(EX_OSERR, "ERROR: Cannot get process list: %s", kvm_geterr(pmcstat_kvm)); } if ((rv = regcomp(®, spec, REG_EXTENDED|REG_NOSUB)) != 0) { regerror(rv, ®, errbuf, sizeof(errbuf)); err(EX_DATAERR, "ERROR: Failed to compile regex \"%s\": %s", spec, errbuf); } for (n = 0, kp = pmcstat_plist; n < nproc; n++, kp++) { if ((rv = regexec(®, kp->ki_comm, 1, ®match, 0)) == 0) { if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = kp->ki_pid; SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); } else if (rv != REG_NOMATCH) { regerror(rv, ®, errbuf, sizeof(errbuf)); errx(EX_SOFTWARE, "ERROR: Regex evalation failed: %s", errbuf); } } regfree(®); return; outofmemory: errx(EX_SOFTWARE, "Out of memory."); /*NOTREACHED*/ } uint32_t pmcstat_get_cpumask(const char *cpuspec) { uint32_t cpumask; int cpu; const char *s; char *end; s = cpuspec; cpumask = 0ULL; do { cpu = strtol(s, &end, 0); if (cpu < 0 || end == s) errx(EX_USAGE, "ERROR: Illegal CPU specification " "\"%s\".", cpuspec); cpumask |= (1 << cpu); s = end + strspn(end, ", \t"); } while (*s); return (cpumask); } void pmcstat_kill_process(struct pmcstat_args *a) { struct pmcstat_target *pt; assert(a->pa_flags & FLAG_HAS_COMMANDLINE); /* * If a command line was specified, it would be the very first * in the list, before any other processes specified by -t. */ pt = SLIST_FIRST(&a->pa_targets); assert(pt != NULL); if (kill(pt->pt_pid, SIGINT) != 0) err(EX_OSERR, "ERROR: cannot signal child process"); } void pmcstat_start_pmcs(struct pmcstat_args *a) { struct pmcstat_ev *ev; STAILQ_FOREACH(ev, &args.pa_events, ev_next) { assert(ev->ev_pmcid != PMC_ID_INVALID); if (pmc_start(ev->ev_pmcid) < 0) { warn("ERROR: Cannot start pmc 0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); pmcstat_cleanup(a); exit(EX_OSERR); } } } void pmcstat_print_headers(struct pmcstat_args *a) { struct pmcstat_ev *ev; int c, w; (void) fprintf(a->pa_printfile, PRINT_HEADER_PREFIX); STAILQ_FOREACH(ev, &a->pa_events, ev_next) { if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) continue; c = PMC_IS_SYSTEM_MODE(ev->ev_mode) ? 's' : 'p'; if (ev->ev_fieldskip != 0) (void) fprintf(a->pa_printfile, "%*s", ev->ev_fieldskip, ""); w = ev->ev_fieldwidth - ev->ev_fieldskip - 2; if (c == 's') (void) fprintf(a->pa_printfile, "s/%02d/%-*s ", ev->ev_cpu, w-3, ev->ev_name); else (void) fprintf(a->pa_printfile, "p/%*s ", w, ev->ev_name); } (void) fflush(a->pa_printfile); } void pmcstat_print_counters(struct pmcstat_args *a) { int extra_width; struct pmcstat_ev *ev; pmc_value_t value; extra_width = sizeof(PRINT_HEADER_PREFIX) - 1; STAILQ_FOREACH(ev, &a->pa_events, ev_next) { /* skip sampling mode counters */ if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) continue; if (pmc_read(ev->ev_pmcid, &value) < 0) err(EX_OSERR, "ERROR: Cannot read pmc " "\"%s\"", ev->ev_name); (void) fprintf(a->pa_printfile, "%*ju ", ev->ev_fieldwidth + extra_width, (uintmax_t) ev->ev_cumulative ? value : (value - ev->ev_saved)); if (ev->ev_cumulative == 0) ev->ev_saved = value; extra_width = 0; } (void) fflush(a->pa_printfile); } /* * Print output */ void pmcstat_print_pmcs(struct pmcstat_args *a) { static int linecount = 0; /* check if we need to print a header line */ if (++linecount > pmcstat_displayheight) { (void) fprintf(a->pa_printfile, "\n"); linecount = 1; } if (linecount == 1) pmcstat_print_headers(a); (void) fprintf(a->pa_printfile, "\n"); pmcstat_print_counters(a); return; } /* * Do process profiling * * If a pid was specified, attach each allocated PMC to the target * process. Otherwise, fork a child and attach the PMCs to the child, * and have the child exec() the target program. */ void pmcstat_start_process(void) { /* Signal the child to proceed. */ if (write(pmcstat_sockpair[PARENTSOCKET], "!", 1) != 1) err(EX_OSERR, "ERROR (parent): write of token failed"); (void) close(pmcstat_sockpair[PARENTSOCKET]); } void pmcstat_show_usage(void) { errx(EX_USAGE, "[options] [commandline]\n" "\t Measure process and/or system performance using hardware\n" "\t performance monitoring counters.\n" "\t Options include:\n" "\t -C\t\t (toggle) show cumulative counts\n" "\t -D path\t create profiles in directory \"path\"\n" "\t -E\t\t (toggle) show counts at process exit\n" "\t -G file\t write a system-wide callgraph to \"file\"\n" + "\t -f type\t set output format (calltree|callgraph) for system-wide callgraph\n" "\t -M file\t print executable/gmon file map to \"file\"\n" "\t -N\t\t (toggle) capture callchains\n" "\t -O file\t send log output to \"file\"\n" "\t -P spec\t allocate a process-private sampling PMC\n" "\t -R file\t read events from \"file\"\n" "\t -S spec\t allocate a system-wide sampling PMC\n" "\t -W\t\t (toggle) show counts per context switch\n" "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n" "\t -d\t\t (toggle) track descendants\n" "\t -g\t\t produce gprof(1) compatible profiles\n" "\t -k dir\t\t set the path to the kernel\n" "\t -n rate\t set sampling rate\n" "\t -o file\t send print output to \"file\"\n" "\t -p spec\t allocate a process-private counting PMC\n" "\t -q\t\t suppress verbosity\n" "\t -r fsroot\t specify FS root directory\n" "\t -s spec\t allocate a system-wide counting PMC\n" "\t -t process-spec attach to running processes matching " "\"process-spec\"\n" "\t -v\t\t increase verbosity\n" "\t -w secs\t set printing time interval\n" "\t -z depth\t limit callchain display depth" ); } /* * Main */ int main(int argc, char **argv) { double interval; int option, npmc, ncpu, haltedcpus; int c, check_driver_stats, current_cpu, current_sampling_count; int do_callchain, do_descendants, do_logproccsw, do_logprocexit; int do_print; size_t dummy; int graphdepth; int pipefd[2]; int use_cumulative_counts; uint32_t cpumask; char *end, *tmp; const char *errmsg, *graphfilename; enum pmcstat_state runstate; struct pmc_driverstats ds_start, ds_end; struct pmcstat_ev *ev; struct sigaction sa; struct kevent kev; struct winsize ws; struct stat sb; char buffer[PATH_MAX]; check_driver_stats = 0; current_cpu = 0; current_sampling_count = DEFAULT_SAMPLE_COUNT; do_callchain = 1; do_descendants = 0; do_logproccsw = 0; do_logprocexit = 0; use_cumulative_counts = 0; graphfilename = "-"; args.pa_required = 0; args.pa_flags = 0; args.pa_verbosity = 1; args.pa_logfd = -1; args.pa_fsroot = ""; args.pa_kernel = strdup("/boot/kernel"); args.pa_samplesdir = "."; args.pa_printfile = stderr; args.pa_graphdepth = DEFAULT_CALLGRAPH_DEPTH; args.pa_graphfile = NULL; args.pa_interval = DEFAULT_WAIT_INTERVAL; args.pa_mapfilename = NULL; args.pa_inputpath = NULL; args.pa_outputpath = NULL; STAILQ_INIT(&args.pa_events); SLIST_INIT(&args.pa_targets); bzero(&ds_start, sizeof(ds_start)); bzero(&ds_end, sizeof(ds_end)); ev = NULL; /* * The initial CPU mask specifies all non-halted CPUS in the * system. */ dummy = sizeof(int); if (sysctlbyname("hw.ncpu", &ncpu, &dummy, NULL, 0) < 0) err(EX_OSERR, "ERROR: Cannot determine the number of CPUs"); cpumask = (1 << ncpu) - 1; haltedcpus = 0; if (ncpu > 1) { if (sysctlbyname("machdep.hlt_cpus", &haltedcpus, &dummy, NULL, 0) < 0) err(EX_OSERR, "ERROR: Cannot determine which CPUs are " "halted"); cpumask &= ~haltedcpus; } while ((option = getopt(argc, argv, - "CD:EG:M:NO:P:R:S:Wc:dgk:m:n:o:p:qr:s:t:vw:z:")) != -1) + "CD:EG:M:NO:P:R:S:Wc:dgk:m:n:o:p:qr:s:t:vw:z:f:")) != -1) switch (option) { case 'C': /* cumulative values */ use_cumulative_counts = !use_cumulative_counts; args.pa_required |= FLAG_HAS_COUNTING_PMCS; break; case 'c': /* CPU */ if (optarg[0] == '*' && optarg[1] == '\0') cpumask = ((1 << ncpu) - 1) & ~haltedcpus; else cpumask = pmcstat_get_cpumask(optarg); args.pa_required |= FLAG_HAS_SYSTEM_PMCS; break; case 'D': if (stat(optarg, &sb) < 0) err(EX_OSERR, "ERROR: Cannot stat \"%s\"", optarg); if (!S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\" is not a " "directory.", optarg); args.pa_samplesdir = optarg; args.pa_flags |= FLAG_HAS_SAMPLESDIR; args.pa_required |= FLAG_DO_GPROF; break; case 'd': /* toggle descendents */ do_descendants = !do_descendants; args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; case 'G': /* produce a system-wide callgraph */ args.pa_flags |= FLAG_DO_CALLGRAPHS; graphfilename = optarg; break; + case 'f': /* output system-wide callgraph in calltree format (KCachegrind) */ + if (strcasecmp(optarg, "calltree") == 0) + args.pa_flags |= FLAG_DO_CALLTREE; + break; + case 'g': /* produce gprof compatible profiles */ args.pa_flags |= FLAG_DO_GPROF; break; case 'k': /* pathname to the kernel */ free(args.pa_kernel); args.pa_kernel = strdup(optarg); args.pa_required |= FLAG_DO_ANALYSIS; args.pa_flags |= FLAG_HAS_KERNELPATH; break; case 'm': args.pa_flags |= FLAG_WANTS_MAPPINGS; graphfilename = optarg; break; case 'E': /* log process exit */ do_logprocexit = !do_logprocexit; args.pa_required |= (FLAG_HAS_PROCESS_PMCS | FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE); break; case 'M': /* mapfile */ args.pa_mapfilename = optarg; break; case 'N': do_callchain = !do_callchain; args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'p': /* process virtual counting PMC */ case 's': /* system-wide counting PMC */ case 'P': /* process virtual sampling PMC */ case 'S': /* system-wide sampling PMC */ if ((ev = malloc(sizeof(*ev))) == NULL) errx(EX_SOFTWARE, "ERROR: Out of memory."); switch (option) { case 'p': ev->ev_mode = PMC_MODE_TC; break; case 's': ev->ev_mode = PMC_MODE_SC; break; case 'P': ev->ev_mode = PMC_MODE_TS; break; case 'S': ev->ev_mode = PMC_MODE_SS; break; } if (option == 'P' || option == 'p') { args.pa_flags |= FLAG_HAS_PROCESS_PMCS; args.pa_required |= (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET); } if (option == 'P' || option == 'S') { args.pa_flags |= FLAG_HAS_SAMPLING_PMCS; args.pa_required |= (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE); } if (option == 'p' || option == 's') args.pa_flags |= FLAG_HAS_COUNTING_PMCS; if (option == 's' || option == 'S') args.pa_flags |= FLAG_HAS_SYSTEM_PMCS; ev->ev_spec = strdup(optarg); if (option == 'S' || option == 'P') ev->ev_count = current_sampling_count; else ev->ev_count = -1; if (option == 'S' || option == 's') ev->ev_cpu = ffs(cpumask) - 1; else ev->ev_cpu = PMC_CPU_ANY; ev->ev_flags = 0; if (do_callchain) ev->ev_flags |= PMC_F_CALLCHAIN; if (do_descendants) ev->ev_flags |= PMC_F_DESCENDANTS; if (do_logprocexit) ev->ev_flags |= PMC_F_LOG_PROCEXIT; if (do_logproccsw) ev->ev_flags |= PMC_F_LOG_PROCCSW; ev->ev_cumulative = use_cumulative_counts; ev->ev_saved = 0LL; ev->ev_pmcid = PMC_ID_INVALID; /* extract event name */ c = strcspn(optarg, ", \t"); ev->ev_name = malloc(c + 1); (void) strncpy(ev->ev_name, optarg, c); *(ev->ev_name + c) = '\0'; STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next); if (option == 's' || option == 'S') pmcstat_clone_event_descriptor(&args, ev, cpumask & ~(1 << ev->ev_cpu)); break; case 'n': /* sampling count */ current_sampling_count = strtol(optarg, &end, 0); if (*end != '\0' || current_sampling_count <= 0) errx(EX_USAGE, "ERROR: Illegal count value \"%s\".", optarg); args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'o': /* outputfile */ if (args.pa_printfile != NULL) (void) fclose(args.pa_printfile); if ((args.pa_printfile = fopen(optarg, "w")) == NULL) errx(EX_OSERR, "ERROR: cannot open \"%s\" for " "writing.", optarg); args.pa_flags |= FLAG_DO_PRINT; break; case 'O': /* sampling output */ if (args.pa_outputpath) errx(EX_USAGE, "ERROR: option -O may only be " "specified once."); args.pa_outputpath = optarg; args.pa_flags |= FLAG_HAS_OUTPUT_LOGFILE; break; case 'q': /* quiet mode */ args.pa_verbosity = 0; break; case 'r': /* root FS path */ args.pa_fsroot = optarg; break; case 'R': /* read an existing log file */ if (args.pa_inputpath != NULL) errx(EX_USAGE, "ERROR: option -R may only be " "specified once."); args.pa_inputpath = optarg; if (args.pa_printfile == stderr) args.pa_printfile = stdout; args.pa_flags |= FLAG_READ_LOGFILE; break; case 't': /* target pid or process name */ pmcstat_find_targets(&args, optarg); args.pa_flags |= FLAG_HAS_TARGET; args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; case 'v': /* verbose */ args.pa_verbosity++; break; case 'w': /* wait interval */ interval = strtod(optarg, &end); if (*end != '\0' || interval <= 0) errx(EX_USAGE, "ERROR: Illegal wait interval " "value \"%s\".", optarg); args.pa_flags |= FLAG_HAS_WAIT_INTERVAL; args.pa_required |= FLAG_HAS_COUNTING_PMCS; args.pa_interval = interval; break; case 'W': /* toggle LOG_CSW */ do_logproccsw = !do_logproccsw; args.pa_required |= (FLAG_HAS_PROCESS_PMCS | FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE); break; case 'z': graphdepth = strtod(optarg, &end); if (*end != '\0' || graphdepth <= 0) errx(EX_USAGE, "ERROR: Illegal callchain " "depth \"%s\".", optarg); args.pa_graphdepth = graphdepth; args.pa_required |= FLAG_DO_CALLGRAPHS; break; case '?': default: pmcstat_show_usage(); break; } args.pa_argc = (argc -= optind); args.pa_argv = (argv += optind); args.pa_cpumask = cpumask; /* For selecting CPUs using -R. */ if (argc) /* command line present */ args.pa_flags |= FLAG_HAS_COMMANDLINE; if (args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS | FLAG_WANTS_MAPPINGS)) args.pa_flags |= FLAG_DO_ANALYSIS; /* * Check invocation syntax. */ /* disallow -O and -R together */ if (args.pa_outputpath && args.pa_inputpath) errx(EX_USAGE, "ERROR: options -O and -R are mutually " "exclusive."); /* -m option is allowed with -R only. */ if (args.pa_flags & FLAG_WANTS_MAPPINGS && args.pa_inputpath == NULL) errx(EX_USAGE, "ERROR: option -m requires an input file"); /* -m option is not allowed combined with -g or -G. */ if (args.pa_flags & FLAG_WANTS_MAPPINGS && args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS)) errx(EX_USAGE, "ERROR: option -m and -g | -G are mutually " "exclusive"); if (args.pa_flags & FLAG_READ_LOGFILE) { errmsg = NULL; if (args.pa_flags & FLAG_HAS_COMMANDLINE) errmsg = "a command line specification"; else if (args.pa_flags & FLAG_HAS_TARGET) errmsg = "option -t"; else if (!STAILQ_EMPTY(&args.pa_events)) errmsg = "a PMC event specification"; if (errmsg) errx(EX_USAGE, "ERROR: option -R may not be used with " "%s.", errmsg); } else if (STAILQ_EMPTY(&args.pa_events)) /* All other uses require a PMC spec. */ pmcstat_show_usage(); /* check for -t pid without a process PMC spec */ if ((args.pa_required & FLAG_HAS_TARGET) && (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0) errx(EX_USAGE, "ERROR: option -t requires a process mode PMC " "to be specified."); /* check for process-mode options without a command or -t pid */ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) && (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0) errx(EX_USAGE, "ERROR: options -d, -E, -p, -P, and -W require " "a command line or target process."); /* check for -p | -P without a target process of some sort */ if ((args.pa_required & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) && (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0) errx(EX_USAGE, "ERROR: options -P and -p require a " "target process or a command line."); /* check for process-mode options without a process-mode PMC */ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) && (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0) errx(EX_USAGE, "ERROR: options -d, -E, and -W require a " "process mode PMC to be specified."); /* check for -c cpu with no system mode PMCs or logfile. */ if ((args.pa_required & FLAG_HAS_SYSTEM_PMCS) && (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -c requires at least one " "system mode PMC to be specified."); /* check for counting mode options without a counting PMC */ if ((args.pa_required & FLAG_HAS_COUNTING_PMCS) && (args.pa_flags & FLAG_HAS_COUNTING_PMCS) == 0) errx(EX_USAGE, "ERROR: options -C, -W, -o and -w require at " "least one counting mode PMC to be specified."); /* check for sampling mode options without a sampling PMC spec */ if ((args.pa_required & FLAG_HAS_SAMPLING_PMCS) && (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) == 0) errx(EX_USAGE, "ERROR: options -N, -n and -O require at " "least one sampling mode PMC to be specified."); /* check if -g/-G are being used correctly */ if ((args.pa_flags & FLAG_DO_ANALYSIS) && !(args.pa_flags & (FLAG_HAS_SAMPLING_PMCS|FLAG_READ_LOGFILE))) errx(EX_USAGE, "ERROR: options -g/-G require sampling PMCs " "or -R to be specified."); /* check if -O was spuriously specified */ if ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) && (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -O is used only with options " "-E, -P, -S and -W."); /* -k kernel path require -g/-G or -R */ if ((args.pa_flags & FLAG_HAS_KERNELPATH) && (args.pa_flags & FLAG_DO_ANALYSIS) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -k is only used with -g/-R."); /* -D only applies to gprof output mode (-g) */ if ((args.pa_flags & FLAG_HAS_SAMPLESDIR) && (args.pa_flags & FLAG_DO_GPROF) == 0) errx(EX_USAGE, "ERROR: option -D is only used with -g."); /* -M mapfile requires -g or -R */ if (args.pa_mapfilename != NULL && (args.pa_flags & FLAG_DO_GPROF) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -M is only used with -g/-R."); /* * Disallow textual output of sampling PMCs if counting PMCs * have also been asked for, mostly because the combined output * is difficult to make sense of. */ if ((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) && ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) == 0)) errx(EX_USAGE, "ERROR: option -O is required if counting and " "sampling PMCs are specified together."); /* * Check if "-k kerneldir" was specified, and if whether * 'kerneldir' actually refers to a a file. If so, use * `dirname path` to determine the kernel directory. */ if (args.pa_flags & FLAG_HAS_KERNELPATH) { (void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot, args.pa_kernel); if (stat(buffer, &sb) < 0) err(EX_OSERR, "ERROR: Cannot locate kernel \"%s\"", buffer); if (!S_ISREG(sb.st_mode) && !S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\": Unsupported file type.", buffer); if (!S_ISDIR(sb.st_mode)) { tmp = args.pa_kernel; args.pa_kernel = strdup(dirname(args.pa_kernel)); free(tmp); (void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot, args.pa_kernel); if (stat(buffer, &sb) < 0) err(EX_OSERR, "ERROR: Cannot stat \"%s\"", buffer); if (!S_ISDIR(sb.st_mode)) errx(EX_USAGE, "ERROR: \"%s\" is not a " "directory.", buffer); } } /* * If we have a callgraph be created, select the outputfile. */ if (args.pa_flags & FLAG_DO_CALLGRAPHS) { if (strcmp(graphfilename, "-") == 0) args.pa_graphfile = args.pa_printfile; else { args.pa_graphfile = fopen(graphfilename, "w"); if (args.pa_graphfile == NULL) err(EX_OSERR, "ERROR: cannot open \"%s\" " "for writing", graphfilename); } } if (args.pa_flags & FLAG_WANTS_MAPPINGS) { args.pa_graphfile = fopen(graphfilename, "w"); if (args.pa_graphfile == NULL) err(EX_OSERR, "ERROR: cannot open \"%s\" for writing", graphfilename); } /* if we've been asked to process a log file, do that and exit */ if (args.pa_flags & FLAG_READ_LOGFILE) { /* * Print the log in textual form if we haven't been * asked to generate profiling information. */ if ((args.pa_flags & FLAG_DO_ANALYSIS) == 0) args.pa_flags |= FLAG_DO_PRINT; + if (args.pa_flags & FLAG_DO_CALLTREE) { + pmcstat_ct_initialize_logging(&args); + args.pa_logfd = pmcstat_ct_open_log(args.pa_inputpath, + PMCSTAT_OPEN_FOR_READ); + if ((args.pa_logparser = pmclog_open(args.pa_logfd)) == NULL) + err(EX_OSERR, "ERROR: Cannot create parser"); + pmcstat_ct_process_log(&args); + pmcstat_ct_shutdown_logging(&args); + exit(EX_OK); + } + pmcstat_initialize_logging(&args); args.pa_logfd = pmcstat_open_log(args.pa_inputpath, PMCSTAT_OPEN_FOR_READ); if ((args.pa_logparser = pmclog_open(args.pa_logfd)) == NULL) err(EX_OSERR, "ERROR: Cannot create parser"); pmcstat_process_log(&args); pmcstat_shutdown_logging(&args); exit(EX_OK); } /* otherwise, we've been asked to collect data */ if (pmc_init() < 0) err(EX_UNAVAILABLE, "ERROR: Initialization of the pmc(3) library failed"); if ((npmc = pmc_npmc(0)) < 0) /* assume all CPUs are identical */ err(EX_OSERR, "ERROR: Cannot determine the number of PMCs " "on CPU %d", 0); /* Allocate a kqueue */ if ((pmcstat_kq = kqueue()) < 0) err(EX_OSERR, "ERROR: Cannot allocate kqueue"); /* * Configure the specified log file or setup a default log * consumer via a pipe. */ if (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) { if (args.pa_outputpath) args.pa_logfd = pmcstat_open_log(args.pa_outputpath, PMCSTAT_OPEN_FOR_WRITE); else { /* * process the log on the fly by reading it in * through a pipe. */ if (pipe(pipefd) < 0) err(EX_OSERR, "ERROR: pipe(2) failed"); if (fcntl(pipefd[READPIPEFD], F_SETFL, O_NONBLOCK) < 0) err(EX_OSERR, "ERROR: fcntl(2) failed"); EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent"); args.pa_logfd = pipefd[WRITEPIPEFD]; args.pa_flags |= (FLAG_HAS_PIPE | FLAG_DO_PRINT); args.pa_logparser = pmclog_open(pipefd[READPIPEFD]); } if (pmc_configure_logfile(args.pa_logfd) < 0) err(EX_OSERR, "ERROR: Cannot configure log file"); } /* remember to check for driver errors if we are sampling or logging */ check_driver_stats = (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) || (args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE); /* * Allocate PMCs. */ STAILQ_FOREACH(ev, &args.pa_events, ev_next) { if (pmc_allocate(ev->ev_spec, ev->ev_mode, ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: Cannot allocate %s-mode pmc with " "specification \"%s\"", PMC_IS_SYSTEM_MODE(ev->ev_mode) ? "system" : "process", ev->ev_spec); if (PMC_IS_SAMPLING_MODE(ev->ev_mode) && pmc_set(ev->ev_pmcid, ev->ev_count) < 0) err(EX_OSERR, "ERROR: Cannot set sampling count " "for PMC \"%s\"", ev->ev_name); } /* compute printout widths */ STAILQ_FOREACH(ev, &args.pa_events, ev_next) { int counter_width; int display_width; int header_width; (void) pmc_width(ev->ev_pmcid, &counter_width); header_width = strlen(ev->ev_name) + 2; /* prefix '%c/' */ display_width = (int) floor(counter_width / 3.32193) + 1; if (PMC_IS_SYSTEM_MODE(ev->ev_mode)) header_width += 3; /* 2 digit CPU number + '/' */ if (header_width > display_width) { ev->ev_fieldskip = 0; ev->ev_fieldwidth = header_width; } else { ev->ev_fieldskip = display_width - header_width; ev->ev_fieldwidth = display_width; } } /* * If our output is being set to a terminal, register a handler * for window size changes. */ if (isatty(fileno(args.pa_printfile))) { if (ioctl(fileno(args.pa_printfile), TIOCGWINSZ, &ws) < 0) err(EX_OSERR, "ERROR: Cannot determine window size"); pmcstat_displayheight = ws.ws_row - 1; EV_SET(&kev, SIGWINCH, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for " "SIGWINCH"); } EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT"); EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO"); /* * An exec() failure of a forked child is signalled by the * child sending the parent a SIGCHLD. We don't register an * actual signal handler for SIGCHLD, but instead use our * kqueue to pick up the signal. */ EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD"); /* setup a timer if we have counting mode PMCs needing to be printed */ if ((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, args.pa_interval * 1000, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for " "timer"); } /* attach PMCs to the target process, starting it if specified */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_create_process(&args); if (check_driver_stats && pmc_get_driver_stats(&ds_start) < 0) err(EX_OSERR, "ERROR: Cannot retrieve driver statistics"); /* Attach process pmcs to the target process. */ if (args.pa_flags & (FLAG_HAS_TARGET | FLAG_HAS_COMMANDLINE)) { if (SLIST_EMPTY(&args.pa_targets)) errx(EX_DATAERR, "ERROR: No matching target " "processes."); if (args.pa_flags & FLAG_HAS_PROCESS_PMCS) pmcstat_attach_pmcs(&args); if (pmcstat_kvm) { kvm_close(pmcstat_kvm); pmcstat_kvm = NULL; } } /* start the pmcs */ pmcstat_start_pmcs(&args); /* start the (commandline) process if needed */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_start_process(); /* initialize logging if printing the configured log */ if ((args.pa_flags & FLAG_DO_PRINT) && (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE))) pmcstat_initialize_logging(&args); /* Handle SIGINT using the kqueue loop */ sa.sa_handler = SIG_IGN; sa.sa_flags = 0; (void) sigemptyset(&sa.sa_mask); if (sigaction(SIGINT, &sa, NULL) < 0) err(EX_OSERR, "ERROR: Cannot install signal handler"); /* * loop till either the target process (if any) exits, or we * are killed by a SIGINT. */ runstate = PMCSTAT_RUNNING; do_print = 0; do { if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) { if (errno != EINTR) err(EX_OSERR, "ERROR: kevent failed"); else continue; } if (kev.flags & EV_ERROR) errc(EX_OSERR, kev.data, "ERROR: kevent failed"); switch (kev.filter) { case EVFILT_PROC: /* target has exited */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) runstate = pmcstat_close_log(&args); else runstate = PMCSTAT_FINISHED; do_print = 1; break; case EVFILT_READ: /* log file data is present */ runstate = pmcstat_process_log(&args); break; case EVFILT_SIGNAL: if (kev.ident == SIGCHLD) { /* * The child process sends us a * SIGCHLD if its exec() failed. We * wait for it to exit and then exit * ourselves. */ (void) wait(&c); runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGIO) { /* * We get a SIGIO if a PMC loses all * of its targets, or if logfile * writes encounter an error. */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) { runstate = pmcstat_close_log(&args); if (args.pa_flags & (FLAG_DO_PRINT|FLAG_DO_ANALYSIS)) pmcstat_process_log(&args); } do_print = 1; /* print PMCs at exit */ runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGINT) { /* Kill the child process if we started it */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_kill_process(&args); /* Close the pipe to self, if present. */ if (args.pa_flags & FLAG_HAS_PIPE) (void) close(pipefd[READPIPEFD]); runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGWINCH) { if (ioctl(fileno(args.pa_printfile), TIOCGWINSZ, &ws) < 0) err(EX_OSERR, "ERROR: Cannot determine " "window size"); pmcstat_displayheight = ws.ws_row - 1; } else assert(0); break; case EVFILT_TIMER: /* print out counting PMCs */ do_print = 1; break; } if (do_print && (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { pmcstat_print_pmcs(&args); if (runstate == PMCSTAT_FINISHED && /* final newline */ (args.pa_flags & FLAG_DO_PRINT) == 0) (void) fprintf(args.pa_printfile, "\n"); do_print = 0; } } while (runstate != PMCSTAT_FINISHED); /* flush any pending log entries */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) pmc_flush_logfile(); pmcstat_cleanup(&args); free(args.pa_kernel); /* check if the driver lost any samples or events */ if (check_driver_stats) { if (pmc_get_driver_stats(&ds_end) < 0) err(EX_OSERR, "ERROR: Cannot retrieve driver " "statistics"); if (ds_start.pm_intr_bufferfull != ds_end.pm_intr_bufferfull && args.pa_verbosity > 0) warnx("WARNING: some samples were dropped. Please " "consider tuning the \"kern.hwpmc.nsamples\" " "tunable."); if (ds_start.pm_buffer_requests_failed != ds_end.pm_buffer_requests_failed && args.pa_verbosity > 0) warnx("WARNING: some events were discarded. Please " "consider tuning the \"kern.hwpmc.nbuffers\" " "tunable."); } exit(EX_OK); } Index: user/fabient/svctest/usr.sbin/pmcstat/pmcstat.h =================================================================== --- user/fabient/svctest/usr.sbin/pmcstat/pmcstat.h (revision 197318) +++ user/fabient/svctest/usr.sbin/pmcstat/pmcstat.h (revision 197319) @@ -1,154 +1,160 @@ /*- * Copyright (c) 2005-2007, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PMCSTAT_H_ #define _PMCSTAT_H_ #define FLAG_HAS_TARGET 0x00000001 /* process target */ #define FLAG_HAS_WAIT_INTERVAL 0x00000002 /* -w secs */ #define FLAG_HAS_OUTPUT_LOGFILE 0x00000004 /* -O file or pipe */ #define FLAG_HAS_COMMANDLINE 0x00000008 /* command */ #define FLAG_HAS_SAMPLING_PMCS 0x00000010 /* -S or -P */ #define FLAG_HAS_COUNTING_PMCS 0x00000020 /* -s or -p */ #define FLAG_HAS_PROCESS_PMCS 0x00000040 /* -P or -p */ #define FLAG_HAS_SYSTEM_PMCS 0x00000080 /* -S or -s */ #define FLAG_HAS_PIPE 0x00000100 /* implicit log */ #define FLAG_READ_LOGFILE 0x00000200 /* -R file */ #define FLAG_DO_GPROF 0x00000400 /* -g */ #define FLAG_HAS_SAMPLESDIR 0x00000800 /* -D dir */ #define FLAG_HAS_KERNELPATH 0x00001000 /* -k kernel */ #define FLAG_DO_PRINT 0x00002000 /* -o */ #define FLAG_DO_CALLGRAPHS 0x00004000 /* -G */ #define FLAG_DO_ANALYSIS 0x00008000 /* -g or -G */ #define FLAG_WANTS_MAPPINGS 0x00010000 /* -m */ +#define FLAG_DO_CALLTREE 0x00020000 /* -f calltree */ #define DEFAULT_SAMPLE_COUNT 65536 #define DEFAULT_WAIT_INTERVAL 5.0 #define DEFAULT_DISPLAY_HEIGHT 23 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_CALLGRAPH_DEPTH 4 #define PRINT_HEADER_PREFIX "# " #define READPIPEFD 0 #define WRITEPIPEFD 1 #define NPIPEFD 2 #define NSOCKPAIRFD 2 #define PARENTSOCKET 0 #define CHILDSOCKET 1 #define PMCSTAT_OPEN_FOR_READ 0 #define PMCSTAT_OPEN_FOR_WRITE 1 #define PMCSTAT_DEFAULT_NW_HOST "localhost" #define PMCSTAT_DEFAULT_NW_PORT "9000" #define PMCSTAT_NHASH 256 #define PMCSTAT_HASH_MASK 0xFF #define PMCSTAT_LDD_COMMAND "/usr/bin/ldd" #define PMCSTAT_PRINT_ENTRY(A,T,...) do { \ (void) fprintf((A)->pa_printfile, "%-9s", T); \ (void) fprintf((A)->pa_printfile, " " __VA_ARGS__); \ (void) fprintf((A)->pa_printfile, "\n"); \ } while (0) enum pmcstat_state { PMCSTAT_FINISHED = 0, PMCSTAT_EXITING = 1, PMCSTAT_RUNNING = 2 }; struct pmcstat_ev { STAILQ_ENTRY(pmcstat_ev) ev_next; int ev_count; /* associated count if in sampling mode */ uint32_t ev_cpu; /* cpus for this event */ int ev_cumulative; /* show cumulative counts */ int ev_flags; /* PMC_F_* */ int ev_fieldskip; /* #leading spaces */ int ev_fieldwidth; /* print width */ enum pmc_mode ev_mode; /* desired mode */ char *ev_name; /* (derived) event name */ pmc_id_t ev_pmcid; /* allocated ID */ pmc_value_t ev_saved; /* for incremental counts */ char *ev_spec; /* event specification */ }; struct pmcstat_target { SLIST_ENTRY(pmcstat_target) pt_next; pid_t pt_pid; }; struct pmcstat_args { int pa_flags; /* argument flags */ int pa_required; /* required features */ int pa_verbosity; /* verbosity level */ FILE *pa_printfile; /* where to send printed output */ int pa_logfd; /* output log file */ char *pa_inputpath; /* path to input log */ char *pa_outputpath; /* path to output log */ void *pa_logparser; /* log file parser */ const char *pa_fsroot; /* FS root where executables reside */ char *pa_kernel; /* pathname of the kernel */ const char *pa_samplesdir; /* directory for profile files */ const char *pa_mapfilename;/* mapfile name */ FILE *pa_graphfile; /* where to send the callgraph */ int pa_graphdepth; /* print depth for callgraphs */ double pa_interval; /* printing interval in seconds */ uint32_t pa_cpumask; /* filter for CPUs analysed */ int pa_argc; char **pa_argv; STAILQ_HEAD(, pmcstat_ev) pa_events; SLIST_HEAD(, pmcstat_target) pa_targets; } args; /* Function prototypes */ void pmcstat_attach_pmcs(struct pmcstat_args *_a); void pmcstat_cleanup(struct pmcstat_args *_a); void pmcstat_clone_event_descriptor(struct pmcstat_args *_a, struct pmcstat_ev *_ev, uint32_t _cpumask); int pmcstat_close_log(struct pmcstat_args *_a); +int pmcstat_ct_close_log(struct pmcstat_args *_a); void pmcstat_create_process(struct pmcstat_args *_a); void pmcstat_find_targets(struct pmcstat_args *_a, const char *_arg); void pmcstat_initialize_logging(struct pmcstat_args *_a); +void pmcstat_ct_initialize_logging(struct pmcstat_args *_a); void pmcstat_kill_process(struct pmcstat_args *_a); int pmcstat_open_log(const char *_p, int _mode); +int pmcstat_ct_open_log(const char *_p, int _mode); void pmcstat_print_counters(struct pmcstat_args *_a); void pmcstat_print_headers(struct pmcstat_args *_a); void pmcstat_print_pmcs(struct pmcstat_args *_a); void pmcstat_show_usage(void); void pmcstat_shutdown_logging(struct pmcstat_args *_a); +void pmcstat_ct_shutdown_logging(struct pmcstat_args *_a); void pmcstat_start_pmcs(struct pmcstat_args *_a); void pmcstat_start_process(void); int pmcstat_process_log(struct pmcstat_args *_a); +int pmcstat_ct_process_log(struct pmcstat_args *_a); uint32_t pmcstat_get_cpumask(const char *_a); #endif /* _PMCSTAT_H_ */ Index: user/fabient/svctest/usr.sbin/pmcstat/pmcstat_logct.c =================================================================== --- user/fabient/svctest/usr.sbin/pmcstat/pmcstat_logct.c (nonexistent) +++ user/fabient/svctest/usr.sbin/pmcstat/pmcstat_logct.c (revision 197319) @@ -0,0 +1,2022 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Transform a hwpmc(4) log into human readable form, and into + * gprof(1) compatible profiles. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmcstat.h" + +#define min(A,B) ((A) < (B) ? (A) : (B)) +#define max(A,B) ((A) > (B) ? (A) : (B)) + +#define PMCSTAT_ALLOCATE 1 + +#define PMCSTAT_PC_RESERVED 0 + +/* + * PUBLIC INTERFACES + * + * pmcstat_ct_initialize_logging() initialize this module, called first + * pmcstat_ct_shutdown_logging() orderly shutdown, called last + * pmcstat_ct_open_log() open an eventlog for processing + * pmcstat_ct_process_log() print/convert an event log + * pmcstat_ct_close_log() finish processing an event log + * + * IMPLEMENTATION NOTES + * + * We correlate each 'callchain' or 'sample' entry seen in the event + * log back to an executable object in the system. Executable objects + * include: + * - program executables, + * - shared libraries loaded by the runtime loader, + * - dlopen()'ed objects loaded by the program, + * - the runtime loader itself, + * - the kernel and kernel modules. + * + * Each process that we know about is treated as a set of regions that + * map to executable objects. Processes are described by + * 'pmcstat_process' structures. Executable objects are tracked by + * 'pmcstat_image' structures. The kernel and kernel modules are + * common to all processes (they reside at the same virtual addresses + * for all processes). Individual processes can have their text + * segments and shared libraries loaded at process-specific locations. + * + * A given executable object can be in use by multiple processes + * (e.g., libc.so) and loaded at a different address in each. + * pmcstat_pcmap structures track per-image mappings. + * + * The sample log could have samples from multiple PMCs; we + * generate one 'gmon.out' profile per PMC. + * + */ + +typedef const void *pmcstat_interned_string; + +/* + * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable + * names. + */ + +struct pmcstat_pmcrecord { + LIST_ENTRY(pmcstat_pmcrecord) pr_next; + pmc_id_t pr_pmcid; + pmcstat_interned_string pr_pmcname; + unsigned int pr_index; +}; + +static unsigned int pmcstat_npmcs = 0; +static LIST_HEAD(,pmcstat_pmcrecord) pmcstat_pmcs = LIST_HEAD_INITIALIZER(&pmcstat_pmcs); +static int pmcstat_mergepmc = 1; + +/* + * A 'pmcstat_image' structure describes an executable program on + * disk. 'pi_execpath' is a cookie representing the pathname of + * the executable. 'pi_start' and 'pi_end' are the least and greatest + * virtual addresses for the text segments in the executable. + * 'pi_gmonlist' contains a linked list of gmon.out files associated + * with this image. + */ + +enum pmcstat_image_type { + PMCSTAT_IMAGE_UNKNOWN = 0, /* never looked at the image */ + PMCSTAT_IMAGE_INDETERMINABLE, /* can't tell what the image is */ + PMCSTAT_IMAGE_ELF32, /* ELF 32 bit object */ + PMCSTAT_IMAGE_ELF64, /* ELF 64 bit object */ + PMCSTAT_IMAGE_AOUT /* AOUT object */ +}; + +struct pmcstat_image { + LIST_ENTRY(pmcstat_image) pi_next; /* hash link */ + TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */ + pmcstat_interned_string pi_execpath; /* cookie */ + pmcstat_interned_string pi_samplename; /* sample path name */ + pmcstat_interned_string pi_fullpath; /* path to FS object */ + pmcstat_interned_string pi_name; /* file name */ + enum pmcstat_image_type pi_type; /* executable type */ + + /* + * Executables have pi_start and pi_end; these are zero + * for shared libraries. + */ + uintfptr_t pi_start; /* start address (inclusive) */ + uintfptr_t pi_end; /* end address (exclusive) */ + uintfptr_t pi_entry; /* entry address */ + uintfptr_t pi_vaddr; /* virtual address where loaded */ + int pi_isdynamic; /* whether a dynamic object */ + int pi_iskernelmodule; + pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */ + + /* All symbols associated with this object. */ + struct pmcstat_symbol *pi_symbols; + size_t pi_symcount; + +}; + +/* + * All image descriptors are kept in a hash table. + */ +static LIST_HEAD(,pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH]; + +/* + * A 'pmcstat_pcmap' structure maps a virtual address range to an + * underlying 'pmcstat_image' descriptor. + */ +struct pmcstat_pcmap { + TAILQ_ENTRY(pmcstat_pcmap) ppm_next; + uintfptr_t ppm_lowpc; + uintfptr_t ppm_highpc; + struct pmcstat_image *ppm_image; +}; + +/* + * A 'pmcstat_process' structure models processes. Each process is + * associated with a set of pmcstat_pcmap structures that map + * addresses inside it to executable objects. This set is implemented + * as a list, kept sorted in ascending order of mapped addresses. + * + * 'pp_pid' holds the pid of the process. When a process exits, the + * 'pp_isactive' field is set to zero, but the process structure is + * not immediately reclaimed because there may still be samples in the + * log for this process. + */ + +struct pmcstat_process { + LIST_ENTRY(pmcstat_process) pp_next; /* hash-next */ + pid_t pp_pid; /* associated pid */ + int pp_isactive; /* whether active */ + uintfptr_t pp_entryaddr; /* entry address */ + TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */ +}; + +/* + * All process descriptors are kept in a hash table. + */ +static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH]; + +static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */ + +/* + * Each function symbol tracked by pmcstat(8). + */ + +struct pmcstat_symbol { + pmcstat_interned_string ps_name; + uint64_t ps_start; + uint64_t ps_end; +}; + +static pmcstat_interned_string pmcstat_previous_filename_printed; + +struct pmcstat_ctnode; + +struct pmcstat_ctarc { + uint32_t pcta_child_count_c; + uint32_t *pcta_child_count; + struct pmcstat_ctnode *pcta_child; +}; + +/* + * Each call tree node is tracked by a pmcstat_ctnode struct. + */ +struct pmcstat_ctnode { + struct pmcstat_image *pct_image; + uintfptr_t pct_func; + uint32_t pct_self_count_c; + uint32_t *pct_self_count; + + uint32_t pct_narc; + uint32_t pct_arc_c; + struct pmcstat_ctarc *pct_arc; +}; + +struct pmcstat_ctnode_hash { + struct pmcstat_ctnode *pch_ctnode; + LIST_ENTRY(pmcstat_ctnode_hash) pch_next; +}; + +/* + * All nodes indexed by function/image name are placed in a hash table. + */ +static LIST_HEAD(,pmcstat_ctnode_hash) pmcstat_ctnode_hash[PMCSTAT_NHASH]; + +/* Misc. statistics */ +static struct pmcstat_stats { + int ps_exec_aout; /* # a.out executables seen */ + int ps_exec_elf; /* # elf executables seen */ + int ps_exec_errors; /* # errors processing executables */ + int ps_exec_indeterminable; /* # unknown executables seen */ + int ps_samples_total; /* total number of samples processed */ + int ps_samples_skipped; /* #samples filtered out for any reason */ + int ps_samples_unknown_offset; /* #samples of rank 0 not in a map */ + int ps_samples_indeterminable; /* #samples in indeterminable images */ + int ps_callchain_dubious_frames;/* #dubious frame pointers seen */ + int ps_callchain_single_frames; /* #single frame seen */ + /* TODO: add stats for specific calltree error + */ +} pmcstat_stats; + + +/* + * Prototypes + */ + +static void pmcstat_image_determine_type(struct pmcstat_image *_image, + struct pmcstat_args *_a); +static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string + _path, int _iskernelmodule); +static void pmcstat_image_get_aout_params(struct pmcstat_image *_image, + struct pmcstat_args *_a); +static void pmcstat_image_get_elf_params(struct pmcstat_image *_image, + struct pmcstat_args *_a); +static void pmcstat_image_link(struct pmcstat_process *_pp, + struct pmcstat_image *_i, uintfptr_t _lpc); + +static void pmcstat_pmcid_add(pmc_id_t _pmcid, pmcstat_interned_string _name); +static void pmcstat_process_aout_exec(struct pmcstat_process *_pp, + struct pmcstat_image *_image, uintfptr_t _entryaddr, + struct pmcstat_args *_a); +static void pmcstat_process_elf_exec(struct pmcstat_process *_pp, + struct pmcstat_image *_image, uintfptr_t _entryaddr, + struct pmcstat_args *_a); +static void pmcstat_process_exec(struct pmcstat_process *_pp, + pmcstat_interned_string _path, uintfptr_t _entryaddr, + struct pmcstat_args *_ao); +static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid, + int _allocate); +static struct pmcstat_pcmap *pmcstat_process_find_map( + struct pmcstat_process *_p, uintfptr_t _pc); + +static int pmcstat_string_compute_hash(const char *_string); +static void pmcstat_string_initialize(void); +static pmcstat_interned_string pmcstat_string_intern(const char *_s); +static pmcstat_interned_string pmcstat_string_lookup(const char *_s); +static int pmcstat_string_lookup_hash(pmcstat_interned_string _is); +static void pmcstat_string_shutdown(void); +static const char *pmcstat_string_unintern(pmcstat_interned_string _is); + + +/* + * A simple implementation of interned strings. Each interned string + * is assigned a unique address, so that subsequent string compares + * can be done by a simple pointer comparision instead of using + * strcmp(). This speeds up hash table lookups and saves memory if + * duplicate strings are the norm. + */ +struct pmcstat_string { + LIST_ENTRY(pmcstat_string) ps_next; /* hash link */ + int ps_len; + int ps_hash; + char *ps_string; +}; + +static LIST_HEAD(,pmcstat_string) pmcstat_string_hash[PMCSTAT_NHASH]; + + +/* + * Block realloc items + */ +static void +pmcstat_growit(uint32_t item, uint32_t *count, uint32_t size, void **items) +{ +#define GROWIT_BLOCKSIZE 4 + uint32_t new_count; + + if (item < *count) + return; + + new_count = *count + max(item + 1 - *count, GROWIT_BLOCKSIZE); + *items = realloc(*items, new_count * size); +. if (*items == NULL) + errx(EX_SOFTWARE, "ERROR: out of memory"); + bzero((char *)*items + *count * size, (new_count - *count) * size); + *count = new_count; +} + +/* + * Compute a 'hash' value for a string. + */ + +static int +pmcstat_string_compute_hash(const char *s) +{ + int hash; + + for (hash = 0; *s; s++) + hash ^= *s; + + return (hash & PMCSTAT_HASH_MASK); +} + +/* + * Intern a copy of string 's', and return a pointer to the + * interned structure. + */ + +static pmcstat_interned_string +pmcstat_string_intern(const char *s) +{ + struct pmcstat_string *ps; + const struct pmcstat_string *cps; + int hash, len; + + if ((cps = pmcstat_string_lookup(s)) != NULL) + return (cps); + + hash = pmcstat_string_compute_hash(s); + len = strlen(s); + + if ((ps = malloc(sizeof(*ps))) == NULL) + err(EX_OSERR, "ERROR: Could not intern string"); + ps->ps_len = len; + ps->ps_hash = hash; + ps->ps_string = strdup(s); + LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next); + return ((pmcstat_interned_string) ps); +} + +static const char * +pmcstat_string_unintern(pmcstat_interned_string str) +{ + const char *s; + + s = ((const struct pmcstat_string *) str)->ps_string; + return (s); +} + +static pmcstat_interned_string +pmcstat_string_lookup(const char *s) +{ + struct pmcstat_string *ps; + int hash, len; + + hash = pmcstat_string_compute_hash(s); + len = strlen(s); + + LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next) + if (ps->ps_len == len && ps->ps_hash == hash && + strcmp(ps->ps_string, s) == 0) + return (ps); + return (NULL); +} + +static int +pmcstat_string_lookup_hash(pmcstat_interned_string s) +{ + const struct pmcstat_string *ps; + + ps = (const struct pmcstat_string *) s; + return (ps->ps_hash); +} + +/* + * Initialize the string interning facility. + */ + +static void +pmcstat_string_initialize(void) +{ + int i; + + for (i = 0; i < PMCSTAT_NHASH; i++) + LIST_INIT(&pmcstat_string_hash[i]); +} + +/* + * Destroy the string table, free'ing up space. + */ + +static void +pmcstat_string_shutdown(void) +{ + int i; + struct pmcstat_string *ps, *pstmp; + + for (i = 0; i < PMCSTAT_NHASH; i++) + LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next, + pstmp) { + LIST_REMOVE(ps, ps_next); + free(ps->ps_string); + free(ps); + } +} + +/* + * Determine whether a given executable image is an A.OUT object, and + * if so, fill in its parameters from the text file. + * Sets image->pi_type. + */ + +static void +pmcstat_image_get_aout_params(struct pmcstat_image *image, + struct pmcstat_args *a) +{ + int fd; + ssize_t nbytes; + struct exec ex; + const char *path; + char buffer[PATH_MAX]; + + path = pmcstat_string_unintern(image->pi_execpath); + assert(path != NULL); + + if (image->pi_iskernelmodule) + errx(EX_SOFTWARE, "ERROR: a.out kernel modules are " + "unsupported \"%s\"", path); + + (void) snprintf(buffer, sizeof(buffer), "%s%s", + a->pa_fsroot, path); + + if ((fd = open(buffer, O_RDONLY, 0)) < 0 || + (nbytes = read(fd, &ex, sizeof(ex))) < 0) { + warn("WARNING: Cannot determine type of \"%s\"", path); + image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE; + if (fd != -1) + (void) close(fd); + return; + } + + (void) close(fd); + + if ((unsigned) nbytes != sizeof(ex) || + N_BADMAG(ex)) + return; + + image->pi_type = PMCSTAT_IMAGE_AOUT; + + /* TODO: the rest of a.out processing */ + + return; +} + +/* + * Helper function. + */ + +static int +pmcstat_symbol_compare(const void *a, const void *b) +{ + const struct pmcstat_symbol *sym1, *sym2; + + sym1 = (const struct pmcstat_symbol *) a; + sym2 = (const struct pmcstat_symbol *) b; + + if (sym1->ps_end <= sym2->ps_start) + return (-1); + if (sym1->ps_start >= sym2->ps_end) + return (1); + return (0); +} + +/* + * Map an address to a symbol in an image. + */ + +static struct pmcstat_symbol * +pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr) +{ + struct pmcstat_symbol sym; + + if (image->pi_symbols == NULL) + return (NULL); + + sym.ps_name = NULL; + sym.ps_start = addr; + sym.ps_end = addr + 1; + + return (bsearch((void *) &sym, image->pi_symbols, + image->pi_symcount, sizeof(struct pmcstat_symbol), + pmcstat_symbol_compare)); +} + +/* + * Add the list of symbols in the given section to the list associated + * with the object. + */ +static void +pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e, + Elf_Scn *scn, GElf_Shdr *sh) +{ + int firsttime; + size_t n, newsyms, nshsyms, nfuncsyms; + struct pmcstat_symbol *symptr; + char *fnname; + GElf_Sym sym; + Elf_Data *data; + + if ((data = elf_getdata(scn, NULL)) == NULL) + return; + + /* + * Determine the number of functions named in this + * section. + */ + + nshsyms = sh->sh_size / sh->sh_entsize; + for (n = nfuncsyms = 0; n < nshsyms; n++) { + if (gelf_getsym(data, (int) n, &sym) != &sym) + return; + if (GELF_ST_TYPE(sym.st_info) == STT_FUNC) + nfuncsyms++; + } + + if (nfuncsyms == 0) + return; + + /* + * Allocate space for the new entries. + */ + firsttime = image->pi_symbols == NULL; + symptr = realloc(image->pi_symbols, + sizeof(*symptr) * (image->pi_symcount + nfuncsyms)); + if (symptr == image->pi_symbols) /* realloc() failed. */ + return; + image->pi_symbols = symptr; + + /* + * Append new symbols to the end of the current table. + */ + symptr += image->pi_symcount; + + for (n = newsyms = 0; n < nshsyms; n++) { + if (gelf_getsym(data, (int) n, &sym) != &sym) + return; + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + if (!firsttime && pmcstat_symbol_search(image, sym.st_value)) + continue; /* We've seen this symbol already. */ + + if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name)) + == NULL) + continue; + + symptr->ps_name = pmcstat_string_intern(fnname); + symptr->ps_start = sym.st_value - image->pi_vaddr; + symptr->ps_end = symptr->ps_start + sym.st_size; + symptr++; + + newsyms++; + } + + image->pi_symcount += newsyms; + + assert(newsyms <= nfuncsyms); + + /* + * Return space to the system if there were duplicates. + */ + if (newsyms < nfuncsyms) + image->pi_symbols = realloc(image->pi_symbols, + sizeof(*symptr) * image->pi_symcount); + + /* + * Keep the list of symbols sorted. + */ + qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr), + pmcstat_symbol_compare); + + /* + * Deal with function symbols that have a size of 'zero' by + * making them extend to the next higher address. These + * symbols are usually defined in assembly code. + */ + for (symptr = image->pi_symbols; + symptr < image->pi_symbols + (image->pi_symcount - 1); + symptr++) + if (symptr->ps_start == symptr->ps_end) + symptr->ps_end = (symptr+1)->ps_start; +} + +/* + * Examine an ELF file to determine the size of its text segment. + * Sets image->pi_type if anything conclusive can be determined about + * this image. + */ + +static void +pmcstat_image_get_elf_params(struct pmcstat_image *image, + struct pmcstat_args *a) +{ + int fd; + size_t i, nph, nsh; + const char *path, *elfbase; + uintfptr_t minva, maxva; + Elf *e; + Elf_Scn *scn; + GElf_Ehdr eh; + GElf_Phdr ph; + GElf_Shdr sh; + enum pmcstat_image_type image_type; + char buffer[PATH_MAX], *p, *q; + + assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN); + + image->pi_start = minva = ~(uintfptr_t) 0; + image->pi_end = maxva = (uintfptr_t) 0; + image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE; + image->pi_isdynamic = 0; + image->pi_dynlinkerpath = NULL; + image->pi_vaddr = 0; + + path = pmcstat_string_unintern(image->pi_execpath); + assert(path != NULL); + + /* + * Look for kernel modules under FSROOT/KERNELPATH/NAME, + * and user mode executable objects under FSROOT/PATHNAME. + */ + if (image->pi_iskernelmodule) + (void) snprintf(buffer, sizeof(buffer), "%s%s/%s", + a->pa_fsroot, a->pa_kernel, path); + else + (void) snprintf(buffer, sizeof(buffer), "%s%s", + a->pa_fsroot, path); + + e = NULL; + if ((fd = open(buffer, O_RDONLY, 0)) < 0 || + (e = elf_begin(fd, ELF_C_READ, NULL)) == NULL || + (elf_kind(e) != ELF_K_ELF)) { + warnx("WARNING: Cannot determine the type of \"%s\".", + buffer); + goto done; + } + + if (gelf_getehdr(e, &eh) != &eh) { + warnx("WARNING: Cannot retrieve the ELF Header for " + "\"%s\": %s.", buffer, elf_errmsg(-1)); + goto done; + } + + if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN && + !(image->pi_iskernelmodule && eh.e_type == ET_REL)) { + warnx("WARNING: \"%s\" is of an unsupported ELF type.", + buffer); + goto done; + } + + image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ? + PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64; + + /* + * Determine the virtual address where an executable would be + * loaded. Additionally, for dynamically linked executables, + * save the pathname to the runtime linker. + */ + if (eh.e_type == ET_EXEC) { + if (elf_getphnum(e, &nph) == 0) { + warnx("WARNING: Could not determine the number of " + "program headers in \"%s\": %s.", buffer, + elf_errmsg(-1)); + goto done; + } + for (i = 0; i < eh.e_phnum; i++) { + if (gelf_getphdr(e, i, &ph) != &ph) { + warnx("WARNING: Retrieval of PHDR entry #%ju " + "in \"%s\" failed: %s.", (uintmax_t) i, + buffer, elf_errmsg(-1)); + goto done; + } + switch (ph.p_type) { + case PT_DYNAMIC: + image->pi_isdynamic = 1; + break; + case PT_INTERP: + if ((elfbase = elf_rawfile(e, NULL)) == NULL) { + warnx("WARNING: Cannot retrieve the " + "interpreter for \"%s\": %s.", + buffer, elf_errmsg(-1)); + goto done; + } + image->pi_dynlinkerpath = + pmcstat_string_intern(elfbase + + ph.p_offset); + break; + case PT_LOAD: + if (ph.p_offset == 0) + image->pi_vaddr = ph.p_vaddr; + break; + } + } + } + + /* + * Get the min and max VA associated with this ELF object. + */ + if (elf_getshnum(e, &nsh) == 0) { + warnx("WARNING: Could not determine the number of sections " + "for \"%s\": %s.", buffer, elf_errmsg(-1)); + goto done; + } + + for (i = 0; i < nsh; i++) { + if ((scn = elf_getscn(e, i)) == NULL || + gelf_getshdr(scn, &sh) != &sh) { + warnx("WARNING: Could not retrieve section header " + "#%ju in \"%s\": %s.", (uintmax_t) i, buffer, + elf_errmsg(-1)); + goto done; + } + if (sh.sh_flags & SHF_EXECINSTR) { + minva = min(minva, sh.sh_addr); + maxva = max(maxva, sh.sh_addr + sh.sh_size); + } + if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM) + pmcstat_image_add_symbols(image, e, scn, &sh); + } + + image->pi_start = minva; + image->pi_end = maxva; + image->pi_type = image_type; + image->pi_fullpath = pmcstat_string_intern(buffer); + + for (p = q = buffer; *p ; p++) { + if ( *p == '\\' || *p == '/' ) + q = p; + } + image->pi_name = pmcstat_string_intern(q); + done: + (void) elf_end(e); + if (fd >= 0) + (void) close(fd); + return; +} + +/* + * Given an image descriptor, determine whether it is an ELF, or AOUT. + * If no handler claims the image, set its type to 'INDETERMINABLE'. + */ + +static void +pmcstat_image_determine_type(struct pmcstat_image *image, + struct pmcstat_args *a) +{ + assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN); + + /* Try each kind of handler in turn */ + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_get_elf_params(image, a); + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_get_aout_params(image, a); + + /* + * Otherwise, remember that we tried to determine + * the object's type and had failed. + */ + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE; +} + +/* + * Locate an image descriptor given an interned path, adding a fresh + * descriptor to the cache if necessary. This function also finds a + * suitable name for this image's sample file. + * + * We defer filling in the file format specific parts of the image + * structure till the time we actually see a sample that would fall + * into this image. + */ + +static struct pmcstat_image * +pmcstat_image_from_path(pmcstat_interned_string internedpath, + int iskernelmodule) +{ + int hash; + struct pmcstat_image *pi; + + hash = pmcstat_string_lookup_hash(internedpath); + + /* First, look for an existing entry. */ + LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next) + if (pi->pi_execpath == internedpath && + pi->pi_iskernelmodule == iskernelmodule) + return (pi); + + /* + * Allocate a new entry and place it at the head of the hash + * and LRU lists. + */ + pi = malloc(sizeof(*pi)); + if (pi == NULL) + return (NULL); + + pi->pi_type = PMCSTAT_IMAGE_UNKNOWN; + pi->pi_execpath = internedpath; + pi->pi_start = ~0; + pi->pi_end = 0; + pi->pi_entry = 0; + pi->pi_vaddr = 0; + pi->pi_isdynamic = 0; + pi->pi_iskernelmodule = iskernelmodule; + pi->pi_dynlinkerpath = NULL; + pi->pi_symbols = NULL; + pi->pi_symcount = 0; + + LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next); + + return (pi); +} + +/* + * Record the fact that PC values from 'start' to 'end' come from + * image 'image'. + */ + +static void +pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image, + uintfptr_t start) +{ + struct pmcstat_pcmap *pcm, *pcmnew; + uintfptr_t offset; + + assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN && + image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE); + + if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL) + err(EX_OSERR, "ERROR: Cannot create a map entry"); + + /* + * Adjust the map entry to only cover the text portion + * of the object. + */ + + offset = start - image->pi_vaddr; + pcmnew->ppm_lowpc = image->pi_start + offset; + pcmnew->ppm_highpc = image->pi_end + offset; + pcmnew->ppm_image = image; + + assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc); + + /* Overlapped mmap()'s are assumed to never occur. */ + TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next) + if (pcm->ppm_lowpc >= pcmnew->ppm_highpc) + break; + + if (pcm == NULL) + TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next); + else + TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next); +} + +/* + * Unmap images in the range [start..end) associated with process + * 'pp'. + */ + +static void +pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start, + uintfptr_t end) +{ + struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew; + + assert(pp != NULL); + assert(start < end); + + /* + * Cases: + * - we could have the range completely in the middle of an + * existing pcmap; in this case we have to split the pcmap + * structure into two (i.e., generate a 'hole'). + * - we could have the range covering multiple pcmaps; these + * will have to be removed. + * - we could have either 'start' or 'end' falling in the + * middle of a pcmap; in this case shorten the entry. + */ + TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) { + assert(pcm->ppm_lowpc < pcm->ppm_highpc); + if (pcm->ppm_highpc <= start) + continue; + if (pcm->ppm_lowpc >= end) + return; + if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) { + /* + * The current pcmap is completely inside the + * unmapped range: remove it entirely. + */ + TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next); + free(pcm); + } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) { + /* + * Split this pcmap into two; curtail the + * current map to end at [start-1], and start + * the new one at [end]. + */ + if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL) + err(EX_OSERR, "ERROR: Cannot split a map " + "entry"); + + pcmnew->ppm_image = pcm->ppm_image; + + pcmnew->ppm_lowpc = end; + pcmnew->ppm_highpc = pcm->ppm_highpc; + + pcm->ppm_highpc = start; + + TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next); + + return; + } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end) + pcm->ppm_highpc = start; + else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end) + pcm->ppm_lowpc = end; + else + assert(0); + } +} + +/* + * Add a {pmcid,name} mapping. + */ + +static void +pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps) +{ + struct pmcstat_pmcrecord *pr; + int max_index = -1, name_index = -1; + + /* Replace an existing name for the PMC. */ + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) { + if (pr->pr_pmcid == pmcid) { + pr->pr_pmcname = ps; + return; + } + if (pmcstat_mergepmc && ps == pr->pr_pmcname) + name_index = pr->pr_index; + if ((int)pr->pr_index > max_index) + max_index = pr->pr_index; + } + max_index++; + /* + * Otherwise, allocate a new descriptor and create the + * appropriate directory to hold gmon.out files. + */ +#define PCT_MAXPMCID 128 + if ((pr = malloc(sizeof(*pr))) == NULL || (name_index < 0 && max_index >= PCT_MAXPMCID)) + err(EX_OSERR, "ERROR: Cannot allocate pmc record"); + + pr->pr_pmcid = pmcid; + pr->pr_pmcname = ps; + pr->pr_index = name_index < 0 ? max_index : name_index; +#ifdef DEBUG + printf("adding pmcid=%lu index=%u name=%s\n", (unsigned long)pr->pr_pmcid, pr->pr_index, pmcstat_string_unintern(ps)); +#endif + LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); + pmcstat_npmcs = pr->pr_index + 1; +} + +static const char * +pmcstat_pmcid_index_to_name(unsigned int pmcid_index) +{ + struct pmcstat_pmcrecord *pr; + + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) + if (pr->pr_index == pmcid_index) + return pmcstat_string_unintern(pr->pr_pmcname); + + err(EX_OSERR, "ERROR: cannot find pmcid name"); + return NULL; +} + +static unsigned int pmcstat_pmcid_to_index(pmc_id_t pmcid) +{ + struct pmcstat_pmcrecord *pr; + + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) + if (pr->pr_pmcid == pmcid) + return pr->pr_index; + + err(EX_OSERR, "ERROR: invalid pmcid"); +} + +/* + * Associate an AOUT image with a process. + */ + +static void +pmcstat_process_aout_exec(struct pmcstat_process *pp, + struct pmcstat_image *image, uintfptr_t entryaddr, + struct pmcstat_args *a) +{ + (void) pp; + (void) image; + (void) entryaddr; + (void) a; + /* TODO Implement a.out handling */ +} + +/* + * Associate an ELF image with a process. + */ + +static void +pmcstat_process_elf_exec(struct pmcstat_process *pp, + struct pmcstat_image *image, uintfptr_t entryaddr, + struct pmcstat_args *a) +{ + uintmax_t libstart; + struct pmcstat_image *rtldimage; + + assert(image->pi_type == PMCSTAT_IMAGE_ELF32 || + image->pi_type == PMCSTAT_IMAGE_ELF64); + + /* Create a map entry for the base executable. */ + pmcstat_image_link(pp, image, image->pi_vaddr); + + /* + * For dynamically linked executables we need to determine + * where the dynamic linker was mapped to for this process, + * Subsequent executable objects that are mapped in by the + * dynamic linker will be tracked by log events of type + * PMCLOG_TYPE_MAP_IN. + */ + + if (image->pi_isdynamic) { + + /* + * The runtime loader gets loaded just after the maximum + * possible heap address. Like so: + * + * [ TEXT DATA BSS HEAP -->*RTLD SHLIBS <--STACK] + * ^ ^ + * 0 VM_MAXUSER_ADDRESS + + * + * The exact address where the loader gets mapped in + * will vary according to the size of the executable + * and the limits on the size of the process'es data + * segment at the time of exec(). The entry address + * recorded at process exec time corresponds to the + * 'start' address inside the dynamic linker. From + * this we can figure out the address where the + * runtime loader's file object had been mapped to. + */ + rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath, + 0); + if (rtldimage == NULL) { + warnx("WARNING: Cannot find image for \"%s\".", + pmcstat_string_unintern(image->pi_dynlinkerpath)); + pmcstat_stats.ps_exec_errors++; + return; + } + + if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_get_elf_params(rtldimage, a); + + if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 && + rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) { + warnx("WARNING: rtld not an ELF object \"%s\".", + pmcstat_string_unintern(image->pi_dynlinkerpath)); + return; + } + + libstart = entryaddr - rtldimage->pi_entry; + pmcstat_image_link(pp, rtldimage, libstart); + } +} + +/* + * Find the process descriptor corresponding to a PID. If 'allocate' + * is zero, we return a NULL if a pid descriptor could not be found or + * a process descriptor process. If 'allocate' is non-zero, then we + * will attempt to allocate a fresh process descriptor. Zombie + * process descriptors are only removed if a fresh allocation for the + * same PID is requested. + */ + +static struct pmcstat_process * +pmcstat_process_lookup(pid_t pid, int allocate) +{ + uint32_t hash; + struct pmcstat_pcmap *ppm, *ppmtmp; + struct pmcstat_process *pp, *pptmp; + + hash = (uint32_t) pid & PMCSTAT_HASH_MASK; /* simplicity wins */ + + LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp) + if (pp->pp_pid == pid) { + /* Found a descriptor, check and process zombies */ + if (allocate && pp->pp_isactive == 0) { + /* remove maps */ + TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, + ppmtmp) { + TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next); + free(ppm); + } + /* remove process entry */ + LIST_REMOVE(pp, pp_next); + free(pp); + break; + } + return (pp); + } + + if (!allocate) + return (NULL); + + if ((pp = malloc(sizeof(*pp))) == NULL) + err(EX_OSERR, "ERROR: Cannot allocate pid descriptor"); + + pp->pp_pid = pid; + pp->pp_isactive = 1; + + TAILQ_INIT(&pp->pp_map); + + LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next); + return (pp); +} + +/* + * Associate an image and a process. + */ + +static void +pmcstat_process_exec(struct pmcstat_process *pp, + pmcstat_interned_string path, uintfptr_t entryaddr, + struct pmcstat_args *a) +{ + struct pmcstat_image *image; + + if ((image = pmcstat_image_from_path(path, 0)) == NULL) { + pmcstat_stats.ps_exec_errors++; + return; + } + + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_determine_type(image, a); + + assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); + + switch (image->pi_type) { + case PMCSTAT_IMAGE_ELF32: + case PMCSTAT_IMAGE_ELF64: + pmcstat_stats.ps_exec_elf++; + pmcstat_process_elf_exec(pp, image, entryaddr, a); + break; + + case PMCSTAT_IMAGE_AOUT: + pmcstat_stats.ps_exec_aout++; + pmcstat_process_aout_exec(pp, image, entryaddr, a); + break; + + case PMCSTAT_IMAGE_INDETERMINABLE: + pmcstat_stats.ps_exec_indeterminable++; + break; + + default: + err(EX_SOFTWARE, "ERROR: Unsupported executable type for " + "\"%s\"", pmcstat_string_unintern(path)); + } +} + + +/* + * Find the map entry associated with process 'p' at PC value 'pc'. + */ + +static struct pmcstat_pcmap * +pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc) +{ + struct pmcstat_pcmap *ppm; + + TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) { + if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc) + return (ppm); + if (pc < ppm->ppm_lowpc) + return (NULL); + } + + return (NULL); +} + +static struct pmcstat_ctnode * +pmcstat_ctnode_allocate(struct pmcstat_image *image, uintfptr_t pc) +{ + struct pmcstat_ctnode *ct; + + if ((ct = malloc(sizeof(*ct))) == NULL) + err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); + + ct->pct_image = image; + ct->pct_func = pc; + + ct->pct_self_count_c = 0; + ct->pct_self_count = NULL; + + ct->pct_narc = 0; + ct->pct_arc_c = 0; + ct->pct_arc = NULL; + return (ct); +} + +static struct pmcstat_ctnode * +pmcstat_ctnode_hash_lookup_image(struct pmcstat_image *image) +{ + struct pmcstat_ctnode *ct; + struct pmcstat_ctnode_hash *h; + unsigned int i, hash; + uintfptr_t pc = image->pi_end; + + for (hash = i = 0; i < sizeof(uintfptr_t); i++) + hash += (pc >> i) & 0xFF; + + hash &= PMCSTAT_HASH_MASK; + + ct = NULL; + LIST_FOREACH(h, &pmcstat_ctnode_hash[hash], pch_next) + { + ct = h->pch_ctnode; + + assert(ct != NULL); + + if (ct->pct_image == image && ct->pct_func == pc) + return (ct); + } + + /* + * We haven't seen this (pmcid, pc) tuple yet, so allocate a + * new callgraph node and a new hash table entry for it. + */ + ct = pmcstat_ctnode_allocate(image, pc); + if ((h = malloc(sizeof(*h))) == NULL) + err(EX_OSERR, "ERROR: Could not allocate callgraph node"); + + h->pch_ctnode = ct; + LIST_INSERT_HEAD(&pmcstat_ctnode_hash[hash], h, pch_next); + + return (ct); +} + +/* + * Look for a callgraph node associated with pmc `pmcid' in the global + * hash table that corresponds to the given `pc' value in the process + * `pp'. + */ +static struct pmcstat_ctnode * +pmcstat_ctnode_hash_lookup_pc(struct pmcstat_ctnode *parent, struct pmcstat_process *pp, uintfptr_t pc, int usermode, unsigned int pmcid_index) +{ + struct pmcstat_pcmap *ppm; + struct pmcstat_symbol *sym; + struct pmcstat_image *image; + struct pmcstat_ctnode *ct; + struct pmcstat_ctnode_hash *h; + uintfptr_t loadaddress; + unsigned int i, hash; + + ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc); + if (ppm == NULL) + return (NULL); + + image = ppm->ppm_image; + + loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start; + pc -= loadaddress; /* Convert to an offset in the image. */ + + /* + * Try determine the function at this offset. If we can't + * find a function round leave the `pc' value alone. + */ + if ((sym = pmcstat_symbol_search(image, pc)) != NULL) + pc = sym->ps_start; + + for (hash = i = 0; i < sizeof(uintfptr_t); i++) + hash += (pc >> i) & 0xFF; + + hash &= PMCSTAT_HASH_MASK; + + ct = NULL; + LIST_FOREACH(h, &pmcstat_ctnode_hash[hash], pch_next) + { + ct = h->pch_ctnode; + + assert(ct != NULL); + + if (ct->pct_image == image && ct->pct_func == pc) + { + if (parent == NULL) { + parent = pmcstat_ctnode_hash_lookup_image(image); + if (parent == NULL) + err(EX_OSERR, "ERROR: Could not allocate default image node"); + } + for (i = 0; i < parent->pct_narc; i++) { + if (parent->pct_arc[i].pcta_child == ct) { + pmcstat_growit(pmcid_index, &parent->pct_arc[i].pcta_child_count_c, sizeof(uint32_t), (void **)&parent->pct_arc[i].pcta_child_count); + parent->pct_arc[i].pcta_child_count[pmcid_index]++; + return (ct); + } + } + pmcstat_growit(parent->pct_narc, &parent->pct_arc_c, sizeof(struct pmcstat_ctarc), (void **)&parent->pct_arc); + pmcstat_growit(pmcid_index, &parent->pct_arc[parent->pct_narc].pcta_child_count_c, sizeof(uint32_t), (void **)&parent->pct_arc[parent->pct_narc].pcta_child_count); + parent->pct_arc[parent->pct_narc].pcta_child_count[pmcid_index] = 1; + parent->pct_arc[parent->pct_narc++].pcta_child = ct; + return (ct); + } + } + + /* + * We haven't seen this (pmcid, pc) tuple yet, so allocate a + * new callgraph node and a new hash table entry for it. + */ + ct = pmcstat_ctnode_allocate(image, pc); + if ((h = malloc(sizeof(*h))) == NULL) + err(EX_OSERR, "ERROR: Could not allocate callgraph node"); + + h->pch_ctnode = ct; + LIST_INSERT_HEAD(&pmcstat_ctnode_hash[hash], h, pch_next); + + if (parent == NULL) { + parent = pmcstat_ctnode_hash_lookup_image(image); + if (parent == NULL) + err(EX_OSERR, "ERROR: Could not allocate default image node"); + } + pmcstat_growit(parent->pct_narc, &parent->pct_arc_c, sizeof(struct pmcstat_ctarc), (void **)&parent->pct_arc); + pmcstat_growit(pmcid_index, &parent->pct_arc[parent->pct_narc].pcta_child_count_c, sizeof(uint32_t), (void **)&parent->pct_arc[parent->pct_narc].pcta_child_count); + parent->pct_arc[parent->pct_narc].pcta_child_count[pmcid_index] = 1; + parent->pct_arc[parent->pct_narc++].pcta_child = ct; + + return (ct); +} + +/* + * Record a callchain for calltree. + */ + +static void +pmcstat_ct_record_callchain(struct pmcstat_process *pp, uint32_t pmcid, + uint32_t nsamples, uintfptr_t *cc, int usermode, struct pmcstat_args *a) +{ + uintfptr_t pc; + int n; + struct pmcstat_ctnode *parent, *child; + unsigned int pmcid_index; + +#ifdef DEBUG + struct pmcstat_symbol *sym; +#endif + + a = a; + pmcid_index = pmcstat_pmcid_to_index(pmcid); + + /* + * Find the callgraph node recorded in the global hash table for this pc. + */ +#ifdef DEBUG +printf("pmcstat_ct_record_callchain: pmcid=%lu pmcid_index=%lu %lu samples to process\n", (unsigned long)pmcid, (unsigned long)pmcid_index, (unsigned long)nsamples); +#endif + n = (int)nsamples - 1; + do { + if (n < 0) + return; + + pc = cc[n--]; + parent = pmcstat_ctnode_hash_lookup_pc(NULL, pp, pc, usermode, pmcid_index); + if (parent == NULL) { +#ifdef DEBUG + printf("ERROR: cannot add parent pc=%p\n", (void *)pc); +#endif + } + } while (parent == NULL); + + +#ifdef DEBUG + sym = pmcstat_symbol_search(parent->pct_image, parent->pct_func); + if (sym) + printf("parent=%s (%p)\n", pmcstat_string_unintern(sym->ps_name), (void *)pc); +#endif + if (n < 0) { + pmcstat_growit(pmcid_index, &parent->pct_self_count_c, sizeof(uint32_t), (void **)&parent->pct_self_count); + parent->pct_self_count[pmcid_index]++; + pmcstat_stats.ps_callchain_single_frames++; + return; + } + + for ( ; n>=0 ; ) { + pc = cc[n--]; + child = pmcstat_ctnode_hash_lookup_pc(parent, pp, pc, usermode, pmcid_index); + if (child == NULL) { +#ifdef DEBUG + printf("ERROR: cannot add child (%p)\n", (void *)pc); +#endif + pmcstat_stats.ps_callchain_dubious_frames++; + continue; + } + +#ifdef DEBUG + sym = pmcstat_symbol_search(child->pct_image, child->pct_func); + if (sym) + printf("child=%s (%p)\n", pmcstat_string_unintern(sym->ps_name), (void *)pc); +#endif + if (n < 0) { + pmcstat_growit(pmcid_index, &child->pct_self_count_c, sizeof(uint32_t), (void **)&child->pct_self_count); + child->pct_self_count[pmcid_index]++; + break; + } + + parent = child; + } +} + +/* + * Print one calltree node. The output format is: + * + * ob=object + * fn=functions + * address nsamples + */ +static void +pmcstat_ctnode_print(struct pmcstat_args *a, struct pmcstat_ctnode *ct) +{ + struct pmcstat_symbol *sym; + struct pmcstat_ctnode *child; + unsigned int i, j; + + /* display ob only when changed from previous node + */ +#if 0 + if (pmcstat_previous_filename_printed != + ct->pct_image->pi_fullpath) { +#endif + pmcstat_previous_filename_printed = ct->pct_image->pi_fullpath; + fprintf(a->pa_graphfile, "ob=%s\n", + pmcstat_string_unintern(pmcstat_previous_filename_printed)); +#if 0 + } +#endif + + if ( ct->pct_image->pi_end == ct->pct_func ) + fprintf(a->pa_graphfile, "fn=%s\n", pmcstat_string_unintern(ct->pct_image->pi_name)); + else + { + sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func); + if (sym) + fprintf(a->pa_graphfile, "fn=%s\n", + pmcstat_string_unintern(sym->ps_name)); + else + fprintf(a->pa_graphfile, "fn=%p\n", + (void *) (ct->pct_image->pi_vaddr + ct->pct_func)); + } +#ifdef DEBUG +printf("parent: %s cost=", pmcstat_string_unintern(sym->ps_name)); +#endif + fprintf(a->pa_graphfile, "*"); + pmcstat_growit(pmcstat_npmcs-1, &ct->pct_self_count_c, sizeof(uint32_t), (void **)&ct->pct_self_count); + for (i = 0; ipct_self_count[i]); +#endif + fprintf(a->pa_graphfile, " %u", ct->pct_self_count[i]); + } +#ifdef DEBUG +printf("\n"); +#endif + fprintf(a->pa_graphfile, "\n"); + + for (i=0 ; ipct_narc; i++) { + child = ct->pct_arc[i].pcta_child; + + /* display cob only when changed from previous node + */ +#if 0 + if (pmcstat_previous_filename_printed != child->pct_image->pi_fullpath) { +#endif + pmcstat_previous_filename_printed = child->pct_image->pi_fullpath; + fprintf(a->pa_graphfile, "cob=%s\n", pmcstat_string_unintern(pmcstat_previous_filename_printed)); +#if 0 + } +#endif + + sym = pmcstat_symbol_search(child->pct_image, child->pct_func); + if (sym) + fprintf(a->pa_graphfile, "cfn=%s\n", pmcstat_string_unintern(sym->ps_name)); + else + fprintf(a->pa_graphfile, "cfn=%p\n", (void *)(child->pct_image->pi_vaddr + child->pct_func)); + +#ifdef DEBUG +printf(" %s cost=", pmcstat_string_unintern(sym->ps_name)); +#endif + fprintf(a->pa_graphfile, "calls=1 *\n"); + fprintf(a->pa_graphfile, "*"); + pmcstat_growit(pmcstat_npmcs-1, &ct->pct_arc[i].pcta_child_count_c, sizeof(uint32_t), (void **)&ct->pct_arc[i].pcta_child_count); + for (j = 0; jpct_arc[i].pcta_child_count[j]); +#endif + fprintf(a->pa_graphfile, " %u", ct->pct_arc[i].pcta_child_count[j]); + } + fprintf(a->pa_graphfile, "\n"); +#ifdef DEBUG +printf("\n"); +#endif + } +} + +/* + * Printing a calltree (KCachegrind) for a PMC. + */ +static void +pmcstat_calltree_print(struct pmcstat_args *a) +{ + unsigned int n, i; + uint32_t nsamples_c; + uint32_t *nsamples; + struct pmcstat_ctnode_hash *pch; + + nsamples_c = 0; + nsamples = NULL; + pmcstat_growit(pmcstat_npmcs, &nsamples_c, sizeof(uint32_t), (void **)&nsamples); + + for (n = 0; n < PMCSTAT_NHASH; n++) + LIST_FOREACH(pch, &pmcstat_ctnode_hash[n], pch_next) { + pmcstat_growit(pmcstat_npmcs-1, &pch->pch_ctnode->pct_self_count_c, sizeof(uint32_t), (void **)&pch->pch_ctnode->pct_self_count); + for (i=0; ipch_ctnode->pct_self_count[i]; + } + + fprintf(a->pa_graphfile, + "version: 1\n"\ + "creator: pmcstat\n"\ + "positions: instr\n"\ + "events:"); + for (i=0; ipa_graphfile, " %s", pmcstat_pmcid_index_to_name(i)); + fprintf(a->pa_graphfile, "\nsummary:"); + for (i=0; ipa_graphfile, " %u", nsamples[i]); + fprintf(a->pa_graphfile, "\n\n"); + + pmcstat_previous_filename_printed = NULL; + for (n = 0; n < PMCSTAT_NHASH; n++) { + LIST_FOREACH(pch, &pmcstat_ctnode_hash[n], pch_next) { + pmcstat_ctnode_print(a, pch->pch_ctnode); + } + } + + free(nsamples); +} + +/* + * Convert a hwpmc(4) log to profile information. A system-wide + * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out + * files usable by gprof(1) are created if FLAG_DO_GPROF is set. + */ +int +pmcstat_ct_process_log(struct pmcstat_args *a) +{ + uint32_t cpu, cpuflags; + pid_t pid; + struct pmcstat_image *image; + struct pmcstat_process *pp, *ppnew; + struct pmcstat_pcmap *ppm, *ppmtmp; + struct pmclog_ev ev; + pmcstat_interned_string image_path; + + assert(a->pa_flags & FLAG_DO_ANALYSIS); + + if (elf_version(EV_CURRENT) == EV_NONE) + err(EX_UNAVAILABLE, "Elf library intialization failed"); + + while (pmclog_read(a->pa_logparser, &ev) == 0) { + assert(ev.pl_state == PMCLOG_OK); + + switch (ev.pl_type) { + case PMCLOG_TYPE_INITIALIZE: + if ((ev.pl_u.pl_i.pl_version & 0xFF000000) != + PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0) + warnx("WARNING: Log version 0x%x does not " + "match compiled version 0x%x.", + ev.pl_u.pl_i.pl_version, + PMC_VERSION_MAJOR); + break; + + case PMCLOG_TYPE_MAP_IN: + /* + * Introduce an address range mapping for a + * userland process or the kernel (pid == -1). + * + * We always allocate a process descriptor so + * that subsequent samples seen for this + * address range are mapped to the current + * object being mapped in. + */ + pid = ev.pl_u.pl_mi.pl_pid; + if (pid == -1) + pp = pmcstat_kernproc; + else + pp = pmcstat_process_lookup(pid, + PMCSTAT_ALLOCATE); + + assert(pp != NULL); + + image_path = pmcstat_string_intern(ev.pl_u.pl_mi. + pl_pathname); + image = pmcstat_image_from_path(image_path, pid == -1); + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_determine_type(image, a); + if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE) + pmcstat_image_link(pp, image, + ev.pl_u.pl_mi.pl_start); + break; + + case PMCLOG_TYPE_MAP_OUT: + /* + * Remove an address map. + */ + pid = ev.pl_u.pl_mo.pl_pid; + if (pid == -1) + pp = pmcstat_kernproc; + else + pp = pmcstat_process_lookup(pid, 0); + + if (pp == NULL) /* unknown process */ + break; + + pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start, + ev.pl_u.pl_mo.pl_end); + break; + + case PMCLOG_TYPE_CALLCHAIN: + pmcstat_stats.ps_samples_total++; + + cpuflags = ev.pl_u.pl_cc.pl_cpuflags; + cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags); + + /* Filter on the CPU id. */ + if ((a->pa_cpumask & (1 << cpu)) == 0) { + pmcstat_stats.ps_samples_skipped++; + break; + } + + pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid, + PMCSTAT_ALLOCATE); + + pmcstat_ct_record_callchain(pp, + ev.pl_u.pl_cc.pl_pmcid, ev.pl_u.pl_cc.pl_npc, + ev.pl_u.pl_cc.pl_pc, + PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), a); + + break; + + case PMCLOG_TYPE_PMCALLOCATE: + /* + * Record the association pmc id between this + * PMC and its name. + */ + pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid, + pmcstat_string_intern(ev.pl_u.pl_a.pl_evname)); + break; + + case PMCLOG_TYPE_PROCEXEC: + + /* + * Change the executable image associated with + * a process. + */ + pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid, + PMCSTAT_ALLOCATE); + + /* delete the current process map */ + TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) { + TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next); + free(ppm); + } + + /* associate this process image */ + image_path = pmcstat_string_intern( + ev.pl_u.pl_x.pl_pathname); + assert(image_path != NULL); + pmcstat_process_exec(pp, image_path, + ev.pl_u.pl_x.pl_entryaddr, a); + break; + + case PMCLOG_TYPE_PROCEXIT: + + /* + * Due to the way the log is generated, the + * last few samples corresponding to a process + * may appear in the log after the process + * exit event is recorded. Thus we keep the + * process' descriptor and associated data + * structures around, but mark the process as + * having exited. + */ + pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0); + if (pp == NULL) + break; + pp->pp_isactive = 0; /* mark as a zombie */ + break; + + case PMCLOG_TYPE_SYSEXIT: + pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0); + if (pp == NULL) + break; + pp->pp_isactive = 0; /* make a zombie */ + break; + + case PMCLOG_TYPE_PROCFORK: + + /* + * Allocate a process descriptor for the new + * (child) process. + */ + ppnew = + pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid, + PMCSTAT_ALLOCATE); + + /* + * If we had been tracking the parent, clone + * its address maps. + */ + pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0); + if (pp == NULL) + break; + TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next) + pmcstat_image_link(ppnew, ppm->ppm_image, + ppm->ppm_lowpc); + break; + + default: /* other types of entries are not relevant */ + break; + } + } + + if (ev.pl_state == PMCLOG_EOF) + return (PMCSTAT_FINISHED); + else if (ev.pl_state == PMCLOG_REQUIRE_DATA) + return (PMCSTAT_RUNNING); + + err(EX_DATAERR, "ERROR: event parsing failed (record %jd, " + "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset); +} + +/* + * Public Interfaces. + */ + +/* + * Close a logfile, after first flushing all in-module queued data. + */ + +int +pmcstat_ct_close_log(struct pmcstat_args *a) +{ + if (pmc_flush_logfile() < 0 || + pmc_configure_logfile(-1) < 0) + err(EX_OSERR, "ERROR: logging failed"); + a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE); + return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING : + PMCSTAT_FINISHED); +} + + + +/* + * Open a log file, for reading or writing. + * + * The function returns the fd of a successfully opened log or -1 in + * case of failure. + */ + +int +pmcstat_ct_open_log(const char *path, int mode) +{ + int error, fd; + size_t hlen; + const char *p, *errstr; + struct addrinfo hints, *res, *res0; + char hostname[MAXHOSTNAMELEN]; + + errstr = NULL; + fd = -1; + + /* + * If 'path' is "-" then open one of stdin or stdout depending + * on the value of 'mode'. + * + * If 'path' contains a ':' and does not start with a '/' or '.', + * and is being opened for writing, treat it as a "host:port" + * specification and open a network socket. + * + * Otherwise, treat 'path' as a file name and open that. + */ + if (path[0] == '-' && path[1] == '\0') + fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1; + else if (mode == PMCSTAT_OPEN_FOR_WRITE && path[0] != '/' && + path[0] != '.' && strchr(path, ':') != NULL) { + + p = strrchr(path, ':'); + hlen = p - path; + if (p == path || hlen >= sizeof(hostname)) { + errstr = strerror(EINVAL); + goto done; + } + + assert(hlen < sizeof(hostname)); + (void) strncpy(hostname, path, hlen); + hostname[hlen] = '\0'; + + (void) memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) { + errstr = gai_strerror(error); + goto done; + } + + fd = -1; + for (res = res0; res; res = res->ai_next) { + if ((fd = socket(res->ai_family, res->ai_socktype, + res->ai_protocol)) < 0) { + errstr = strerror(errno); + continue; + } + if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) { + errstr = strerror(errno); + (void) close(fd); + fd = -1; + continue; + } + errstr = NULL; + break; + } + freeaddrinfo(res0); + + } else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ? + O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC), + S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) + errstr = strerror(errno); + + done: + if (errstr) + errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path, + (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"), + errstr); + + return (fd); +} + +/* + * Initialize module. + */ + +void +pmcstat_ct_initialize_logging(struct pmcstat_args *a) +{ + int i; + + (void) a; + + /* use a convenient format for 'ldd' output */ + if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0) + err(EX_OSERR, "ERROR: Cannot setenv"); + + /* Initialize hash tables */ + pmcstat_string_initialize(); + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_INIT(&pmcstat_image_hash[i]); + LIST_INIT(&pmcstat_process_hash[i]); + } + + /* + * Create a fake 'process' entry for the kernel with pid -1. + * hwpmc(4) will subsequently inform us about where the kernel + * and any loaded kernel modules are mapped. + */ + if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1, + PMCSTAT_ALLOCATE)) == NULL) + err(EX_OSERR, "ERROR: Cannot initialize logging"); +} + +/* + * Shutdown module. + */ + +void +pmcstat_ct_shutdown_logging(struct pmcstat_args *a) +{ + int i; + struct pmcstat_image *pi, *pitmp; + struct pmcstat_process *pp, *pptmp; +#if 0 + struct pmcstat_ctnode_hash *pch, *pchtmp; +#endif + + pmcstat_calltree_print(a); + +#if 0 + /* + * Free memory. + */ + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_FOREACH_SAFE(pch, &pmcstat_ctnode_hash[i], pch_next, + pchtmp) { + pmcstat_ctnode_free(pch->pch_ctnode); + free(pch); + } + } +#endif + + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) + { + if (pi->pi_symbols) + free(pi->pi_symbols); + + LIST_REMOVE(pi, pi_next); + free(pi); + } + + LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next, + pptmp) { + LIST_REMOVE(pp, pp_next); + free(pp); + } + } + + pmcstat_string_shutdown(); + + /* + * Print errors unless -q was specified. Print all statistics + * if verbosity > 1. + */ +#define PRINT(N,V,A) do { \ + if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2) \ + (void) fprintf((A)->pa_printfile, " %-40s %d\n",\ + N, pmcstat_stats.ps_##V); \ + } while (0) + + if (a->pa_verbosity >= 1) { + (void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n"); + PRINT("#exec/a.out", exec_aout, a); + PRINT("#exec/elf", exec_elf, a); + PRINT("#exec/unknown", exec_indeterminable, a); + PRINT("#exec handling errors", exec_errors, a); + PRINT("#samples/total", samples_total, a); + PRINT("#samples/unclaimed", samples_unknown_offset, a); + PRINT("#samples/unknown-object", samples_indeterminable, a); + PRINT("#callchain/dubious-frames", callchain_dubious_frames, a); + PRINT("#callchain/single-frames", callchain_single_frames, a); + } + +} +