#!/usr/bin/env python3 import tempfile as tf import os import sys import json import subprocess as sp import argparse as ap import matplotlib from matplotlib.backends.backend_pdf import PdfPages matplotlib.use("Agg") import matplotlib.pyplot as plt DTRACE_SCRIPT = """ syscall::execve:entry /progenyof($target)/ { printf("%Y|%s|A|%s|FF0000", walltimestamp, execname, stringof(copyinstr(arg0))); } syscall:::entry /progenyof($target)/ { self->ts = timestamp; @counts[probefunc] = count(); } syscall:::return /self->ts/ { @times[execname, probefunc] = quantize((timestamp - self->ts)/1000); self->ts = 0; } END { printa(@counts); printa(@times); } """ class DTraceAggregation: keys: list data: list def __init__(self, name, mixed_data) -> None: keys = [d[0] for d in mixed_data] data = [d[1] for d in mixed_data] self.name = name self.keys = keys.copy() self.data = data.copy() def decode_aggregation(agg): agg_data = agg["agg_data"] # aggregation buckets k_data = [] data = [] name = agg["agg_name"] for d in agg_data: keys = d["keys"] for k in keys: if "value" in k: k_data.append(k["value"]) if "count" in d: data.append((k_data.copy(), d["count"])) elif "buckets" in d: data.append((k_data.copy(), d["buckets"])) k_data.clear() return DTraceAggregation(name, data) def warn(s): print(f"{sys.argv[0]}: {s}") def die(s): warn(s) exit(1) def process_aggregation(agg): if agg.name == "counts": fig = plt.figure() names = [x[0] for x in agg.keys] plt.barh(names, agg.data) plt.ylabel("system call") plt.xlabel("count") plt.title("Counting up system calls") counts_pdf.savefig(fig) plt.close() elif agg.name == "times": for k, b in zip(agg.keys, agg.data): fig = plt.figure() values = [x["value"] for x in b] counts = [x["count"] for x in b] min_x, max_x, min_y, max_y = get_axes(values, counts) assert max_x != 0 assert max_y != 0 plt.axis([min_x - 1, max_x + 1, min_y, max_y]) plt.bar(values, counts, width=0.8) plt.xlabel("length (in microseconds)") plt.ylabel(f"count ({k[0]}: {k[1]})") plt.title(f"Time spent in system call ({k[0]}: {k[1]})") times_pdf.savefig(fig) plt.close() def get_axes(values, counts): min_x = max_x = min_y = max_y = 0 prev_c = prev_v = None for v, c in zip(values, counts): min_y = min(min_y, c) max_y = max(max_y, c) if c == 0 and min_x == 0: continue if min_x == 0: min_x = v if prev_c != None and prev_c != 0 and c == 0: max_x = prev_v prev_c = c prev_v = v return (min_x, max_x, min_y, max_y) if __name__ == "__main__": # Boilerplate dtrace_cmd = "dtrace" parser = ap.ArgumentParser( prog="buildmon.py", description="Runs a build command and produces a Gource file", epilog="Please report any bugs to Domagoj Stolfa " ) parser.add_argument("-c", "--command", help="Specify the build command to run") parser.add_argument("-D", "--directory", help="Specify the directory in which to run the command") parser.add_argument("-o", "--outfile", help="Specify the output file for stdout") args = parser.parse_args() work_directory = args.directory command = args.command if command is None: die("expected command") if args.outfile: sys.stdout = open(args.outfile, "w") if work_directory is not None: os.chdir(work_directory) with tf.NamedTemporaryFile() as tmp: # This is where oformat sees use p = sp.Popen( [dtrace_cmd, "-x", "oformat=json", "-c", command, "-o", tmp.name, "-n", DTRACE_SCRIPT], stdout=sp.PIPE, stderr=sp.PIPE, ) """ Get stdout and stderr. This implies that we are waiting for the program to finish to actually gather all of our data. In order to process incomplete JSON, one would need to manually readline and wait for flushes and then handle the probe array 1 by 1. """ out, err = p.communicate() if p.returncode != 0: die(err.decode()) raw_data = tmp.read() data = json.loads(raw_data) probes = data["dtrace"]["probes"] times_pdf = PdfPages("buildmon_times.pdf") counts_pdf = PdfPages("buildmon_counts.pdf") for p in probes: if "output" not in p: continue output = p["output"] for o in output: if "message" in o: print(o["message"]) if "agg_data" in o: agg = decode_aggregation(o) process_aggregation(agg) times_pdf.close() counts_pdf.close()