Index: share/mk/graph-mkincludes =================================================================== --- /dev/null +++ share/mk/graph-mkincludes @@ -0,0 +1,383 @@ +#!/usr/bin/env python + +# Copyright (c) 2016 Jonathan Anderson. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +""" +Parser for the include graph of BSD makefiles. + +Reads BSD makefiles and constructs a graph of file inclusions. This graph +incorporates knowledge of the order in which these files are included and +can show how metadata such as variables propagate through the graph. + +Usage: + graph-makeincludes [options] [ ...] + graph-makeincludes -h | --help + graph-makeincludes --version + +Options: + -h --help Show this screen. + --version Show version. + --filter-singletons Filter out nodes with no edges in or out + --filter-unconnected Filter out nodes not connected to nodes with metadata + --metadata= Metadata to search for [default: var:CC,var:LOCALBASE] + -o , --output= Output file, or '-' for stdout [default: -]. + filename Input file(s), in BSD make form +""" + +import collections +import docopt +import itertools +import os +import re +import sys + + +class File(object): + """ + A file in the Makefile graph contains zero or more `.include` lines + and also keeps track of where it is `.include`d. + """ + def __init__(self): + self.lines = collections.defaultdict(Line) + self.edges_out = set() + +class Line(object): + """ + A `.include` line includes a source makefile into the current file. + """ + def __init__(self): + self.edges_in = set() + self.definitions = set() + self.metadata = collections.defaultdict(bool) + + +def parse(filenames, metadata): + """ + Look through a set of Makefiles to find "interesting" lines + (e.g., those that include other Makefiles or define key variables). + """ + + parse.include = re.compile('^\.[a-z]?include [<"](.*)[>"]') + files = collections.defaultdict(File) + + for filename in filenames: + (dirname, basename) = os.path.split(filename) + line_number = 0 + + for line in open(filename): + line_number += 1 + + # This lambda ensures we retrieve the file/line only as + # needed, avoiding spurious object / node creation. + get_node = lambda: files[filename].lines[line_number] + + match = parse.include.match(line) + if match: + node = get_node() + + (included,) = match.groups() + included = included.replace('${.CURDIR}/', '') + if not included.startswith('$'): + included = os.path.join(dirname, included) + + included = os.path.normpath(included) + + node.edges_in.add(included) + files[included].edges_out.add(node) + + for (name, pattern) in metadata.items(): + if pattern.match(line): + node = get_node() + node.definitions.add(name) + node.metadata[name] = True + + return files + + +def filter_singletons(files): + """ + Filter out singleton files (files that neither include nor are included + by other files). + """ + + keep = set() + + for (name,f) in files.items(): + if len(f.edges_out) > 0: + keep.add(name) + continue + + for l in f.lines.values(): + if len(l.edges_in) > 0: + keep.add(name) + break + + return { name:f for name,f in files.items() if name in keep } + + +def filter_unconnected(files): + have_metadata = set() + + for (name, f) in files.items(): + for l in f.lines.values(): + if any(l.metadata.values()): + have_metadata.add(name) + break + + keep = set() + def keep_ancestors(f): + for l in f.lines.values(): + for e in l.edges_in: + if e in keep or e not in files: + continue + + keep.add(e) + keep_ancestors(files[e]) + + def keep_descendents(f): + for e in f.edges_out: + if e in keep or e not in files: + continue + + keep.add(e) + keep_descendents(files[e]) + + for name in have_metadata: + f = files[name] + if name in keep: + continue + + keep.add(name) + keep_ancestors(f) + keep_descendents(f) + + return { name:files[name] for name in keep } + + +def propagate_metadata(files, metadata): + """ + Propagate all metadata through the graph, repeating until we can + walk though the entire graph without doing any propagation. + """ + + def propagate(source, dest): + """ + Propagate the presence of metadata from one node to another. + """ + for name in metadata: + if source.metadata[name] and not dest.metadata[name]: + dest.metadata[name] = True + propagate.new_taint = True + + propagate.new_taint = True + + while propagate.new_taint: + propagate.new_taint = False + + for (filename,f) in files.items(): + previous = None + + # Propagate from line to line within the file: + for line_number in sorted(f.lines.keys()): + node = f.lines[line_number] + if previous: propagate(previous, node) + previous = node + + # Propagate from the last line to its include sites: + if previous: + last = previous + for recipient in f.edges_out: + propagate(last, recipient) + + +def attribute_list(attrs): + """ + Convert a Python dictionary to a GraphViz dot attribute list. + """ + return '[ %s ]' % ', '.join( + [ '%s="%s"' % (k,v) for k,v in attrs.items() if v is not None] + ) + + +# Parse arguments. +args = docopt.docopt(__doc__) + +filenames = args[''] +print('Parsing %d files' % len(filenames)) + +outname = args['--output'] +outfile = open(outname, 'w') if outname != '-' else sys.stdout + +# Set up metadata: filter out specials we don't care about, add variables. +metadata = {} +for m in args['--metadata'].split(','): + if m.startswith('rule:'): + name = m[5:] + metadata[name] = re.compile('^%s:' % name) + + elif m.startswith('var:'): + name = m[4:] + metadata[name] = re.compile('^%s((=)|(\t.*=))' % name) + + else: + sys.stderr.write("Error: unknown metadata type: '%s'\n" % m) + sys.exit(1) + +pastels = [ '/pastel28/%d' % x for x in range(1,9) ] +colours = dict(zip(metadata.keys(), itertools.cycle(pastels))) + + +# +# Parse the makefiles and propagate any metadata that we care about. +# +files = parse(filenames, metadata) +propagate_metadata(files, metadata) + +if args['--filter-singletons']: + files = filter_singletons(files) + +if args['--filter-unconnected']: + files = filter_unconnected(files) + + +# +# Write the actual graph, starting with a legend: +# +outfile.write(''' +digraph { + rankdir = TB; + + subgraph cluster_legend { + label = "Legend"; + rankdir = TB; + edge [ style = invis; ]; +''') + +prev = None + +for name in sorted(metadata): + outfile.write(' "%s" %s;\n' % (name, attribute_list({ + 'style': 'filled', + 'fillcolor': colours[name], + }))) + + if prev: outfile.write(' "%s" -> "%s";\n' % (prev, name)) + prev = name + +outfile.write('\n }\n') + +# +# First, define each file as a cluster of lines (or just a box): +# +for (filename,f) in files.items(): + safe_name = filename + for unsafe in [ '+', '.', '-', '$', '/' ]: + safe_name = safe_name.replace(unsafe, '_') + + lines = sorted(f.lines.items(), key = lambda (l,_): l) + + # + # If a file has no .include lines, it is only a source of include + # material rather than a recipient of it. Represent such files + # just a box. + # + if len(lines) == 0: + attrs = attribute_list({ + 'label': filename, + 'shape': 'rectangle', + }) + + outfile.write(' "%s" %s;\n' % (filename, attrs)) + continue + + + # + # If the file does contain .include lines, show these lines as + # nodes within a boxed cluster. + # + outfile.write(' subgraph cluster_%s {\n' % safe_name) + outfile.write(' rankdir = TB;\n') + outfile.write(' label = "%s";\n' % filename) + + previous = None + prev_meta = False + + for (line_number, node) in lines: + name = '%s:%d' % (filename, line_number) + + fills = [ colours[meta] for (meta,val) in node.metadata.items() if val ] + fill = ':'.join(fills) if len(fills) > 0 else 'white' + + shape = None + if len(node.definitions) > 0: + if prev_meta: + attrs['color'] = 'orange' + shape = 'tripleoctagon' + else: + shape = 'doublecircle' + + attrs = { + 'fillcolor': fill, + 'label': line_number, + 'shape': shape, + 'style': 'filled', + } + + outfile.write(' "%s" %s;\n' % (name, attribute_list(attrs))) + + if previous: + attrs = { 'style': 'dotted' } + outfile.write(' "%s" -> "%s" %s;\n' % ( + previous, name, attribute_list(attrs))) + + previous = name + prev_meta = any(node.metadata.values()) if previous else False + + outfile.write(' }\n') + +# +# Now iterate through all nodes again to join the clusters up. +# We have to do things this way so that we don't end up defining nodes +# inside the wrong clusters. +# +for (filename,f) in files.items(): + for (line_number, node) in f.lines.items(): + name = '%s:%d' % (filename, line_number) + + for source_filename in node.edges_in: + if source_filename not in files: + continue + + source_file = files[source_filename] + source_lines = sorted(source_file.lines.keys()) + + if len(source_lines) == 0: + source = source_filename + else: + final_line = source_lines[-1] + source = '%s:%d' % (source_filename, final_line) + + outfile.write(' "%s" -> "%s";\n' % (source, name)) + +outfile.write('}\n')