#!/usr/bin/env python3

import tempfile as tf
import os
import sys
import json
import subprocess as sp
import argparse as ap
import matplotlib

from matplotlib.backends.backend_pdf import PdfPages

matplotlib.use("Agg")
import matplotlib.pyplot as plt

DTRACE_SCRIPT = """
syscall::execve:entry /progenyof($target)/ {
    printf("%Y|%s|A|%s|FF0000", walltimestamp,
                                execname,
                                stringof(copyinstr(arg0)));
}

syscall:::entry /progenyof($target)/ {
    self->ts = timestamp;
    @counts[probefunc] = count();
}

syscall:::return /self->ts/ {
    @times[execname, probefunc] = quantize((timestamp - self->ts)/1000);
    self->ts = 0;
}

END {
    printa(@counts);
    printa(@times);
}
"""

class DTraceAggregation:
    keys: list
    data: list

    def __init__(self, name, mixed_data) -> None:
        keys = [d[0] for d in mixed_data]
        data = [d[1] for d in mixed_data]

        self.name = name
        self.keys = keys.copy()
        self.data = data.copy()


def decode_aggregation(agg):
    agg_data = agg["agg_data"]  # aggregation buckets
    k_data = []
    data = []
    name = agg["agg_name"]
    for d in agg_data:
        keys = d["keys"]
        for k in keys:
            if "value" in k:
                k_data.append(k["value"])
        if "count" in d:
            data.append((k_data.copy(), d["count"]))
        elif "buckets" in d:
            data.append((k_data.copy(), d["buckets"]))
        k_data.clear()
    return DTraceAggregation(name, data)


def warn(s):
    print(f"{sys.argv[0]}: {s}")


def die(s):
    warn(s)
    exit(1)


def process_aggregation(agg):
    if agg.name == "counts":
        fig = plt.figure()
        names = [x[0] for x in agg.keys]
        plt.barh(names, agg.data)
        plt.ylabel("system call")
        plt.xlabel("count")
        plt.title("Counting up system calls")

        counts_pdf.savefig(fig)
        plt.close()
    elif agg.name == "times":
        for k, b in zip(agg.keys, agg.data):
            fig = plt.figure()

            values = [x["value"] for x in b]
            counts = [x["count"] for x in b]

            min_x, max_x, min_y, max_y = get_axes(values, counts)
            assert max_x != 0
            assert max_y != 0

            plt.axis([min_x - 1, max_x + 1, min_y, max_y])

            plt.bar(values, counts, width=0.8)
            plt.xlabel("length (in microseconds)")
            plt.ylabel(f"count ({k[0]}: {k[1]})")
            plt.title(f"Time spent in system call ({k[0]}: {k[1]})")

            times_pdf.savefig(fig)
            plt.close()


def get_axes(values, counts):
    min_x = max_x = min_y = max_y = 0
    prev_c = prev_v = None
    for v, c in zip(values, counts):
        min_y = min(min_y, c)
        max_y = max(max_y, c)

        if c == 0 and min_x == 0:
            continue

        if min_x == 0:
            min_x = v

        if prev_c != None and prev_c != 0 and c == 0:
            max_x = prev_v
        prev_c = c
        prev_v = v

    return (min_x, max_x, min_y, max_y)

if __name__ == "__main__":
    # Boilerplate
    dtrace_cmd = "dtrace"

    parser = ap.ArgumentParser(
        prog="buildmon.py",
        description="Runs a build command and produces a Gource file",
        epilog="Please report any bugs to Domagoj Stolfa <ds815@gmx.com>"
    )

    parser.add_argument("-c", "--command", help="Specify the build command to run")
    parser.add_argument("-D", "--directory", help="Specify the directory in which to run the command")
    parser.add_argument("-o", "--outfile", help="Specify the output file for stdout")

    args = parser.parse_args()

    work_directory = args.directory
    command = args.command
    if command is None:
        die("expected command")

    if args.outfile:
        sys.stdout = open(args.outfile, "w")

    if work_directory is not None:
        os.chdir(work_directory)

    with tf.NamedTemporaryFile() as tmp:
        # This is where oformat sees use
        p = sp.Popen(
            [dtrace_cmd, "-x", "oformat=json", "-c",
             command, "-o", tmp.name, "-n", DTRACE_SCRIPT],
            stdout=sp.PIPE,
            stderr=sp.PIPE,
        )

        """
        Get stdout and stderr. This implies that we are waiting for the
        program to finish to actually gather all of our data. In order to
        process incomplete JSON, one would need to manually readline and
        wait for flushes and then handle the probe array 1 by 1.
        """
        out, err = p.communicate()
        if p.returncode != 0:
            die(err.decode())

        raw_data = tmp.read()
        data = json.loads(raw_data)

        probes = data["dtrace"]["probes"]

        times_pdf = PdfPages("buildmon_times.pdf")
        counts_pdf = PdfPages("buildmon_counts.pdf")

        for p in probes:
            if "output" not in p:
                continue
            output = p["output"]

            for o in output:
                if "message" in o:
                    print(o["message"])
                if "agg_data" in o:
                    agg = decode_aggregation(o)
                    process_aggregation(agg)

        times_pdf.close()
        counts_pdf.close()