Index: share/mk/graph-mkincludes
===================================================================
--- /dev/null
+++ share/mk/graph-mkincludes
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2016 Jonathan Anderson. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+"""
+Parser for the include graph of BSD makefiles.
+
+Reads BSD makefiles and constructs a graph of file inclusions. This graph
+incorporates knowledge of the order in which these files are included and
+can show how metadata such as variables propagate through the graph.
+
+Usage:
+  graph-makeincludes [options] <filename> [<filename> ...]
+  graph-makeincludes -h | --help
+  graph-makeincludes --version
+
+Options:
+  -h --help              Show this screen.
+  --version              Show version.
+  --filter-singletons    Filter out nodes with no edges in or out
+  --filter-unconnected   Filter out nodes not connected to nodes with metadata
+  --metadata=<metadata>  Metadata to search for [default: var:CC,var:LOCALBASE]
+  -o <file>, --output=<file>  Output file, or '-' for stdout [default: -].
+  filename               Input file(s), in BSD make form
+"""
+
+import collections
+import docopt
+import itertools
+import os
+import re
+import sys
+
+
+class File(object):
+	"""
+	A file in the Makefile graph contains zero or more `.include` lines
+	and also keeps track of where it is `.include`d.
+	"""
+	def __init__(self):
+		self.lines = collections.defaultdict(Line)
+		self.edges_out = set()
+
+class Line(object):
+	"""
+	A `.include` line includes a source makefile into the current file.
+	"""
+	def __init__(self):
+		self.edges_in = set()
+		self.definitions = set()
+		self.metadata = collections.defaultdict(bool)
+
+
+def parse(filenames, metadata):
+	"""
+	Look through a set of Makefiles to find "interesting" lines
+	(e.g., those that include other Makefiles or define key variables).
+	"""
+
+	parse.include = re.compile('^\.[a-z]?include [<"](.*)[>"]')
+	files = collections.defaultdict(File)
+
+	for filename in filenames:
+		(dirname, basename) = os.path.split(filename)
+		line_number = 0
+
+		for line in open(filename):
+			line_number += 1
+
+			# This lambda ensures we retrieve the file/line only as
+			# needed, avoiding spurious object / node creation.
+			get_node = lambda: files[filename].lines[line_number]
+
+			match = parse.include.match(line)
+			if match:
+				node = get_node()
+
+				(included,) = match.groups()
+				included = included.replace('${.CURDIR}/', '')
+				if not included.startswith('$'):
+					included = os.path.join(dirname, included)
+
+				included = os.path.normpath(included)
+
+				node.edges_in.add(included)
+				files[included].edges_out.add(node)
+
+			for (name, pattern) in metadata.items():
+				if pattern.match(line):
+					node = get_node()
+					node.definitions.add(name)
+					node.metadata[name] = True
+
+	return files
+
+
+def filter_singletons(files):
+	"""
+	Filter out singleton files (files that neither include nor are included
+	by other files).
+	"""
+
+	keep = set()
+
+	for (name,f) in files.items():
+		if len(f.edges_out) > 0:
+			keep.add(name)
+			continue
+
+		for l in f.lines.values():
+			if len(l.edges_in) > 0:
+				keep.add(name)
+				break
+
+	return { name:f for name,f in files.items() if name in keep }
+
+
+def filter_unconnected(files):
+	have_metadata = set()
+
+	for (name, f) in files.items():
+		for l in f.lines.values():
+			if any(l.metadata.values()):
+				have_metadata.add(name)
+				break
+
+	keep = set()
+	def keep_ancestors(f):
+		for l in f.lines.values():
+			for e in l.edges_in:
+				if e in keep or e not in files:
+					continue
+
+				keep.add(e)
+				keep_ancestors(files[e])
+
+	def keep_descendents(f):
+		for e in f.edges_out:
+			if e in keep or e not in files:
+				continue
+
+			keep.add(e)
+			keep_descendents(files[e])
+
+	for name in have_metadata:
+		f = files[name]
+		if name in keep:
+			continue
+
+		keep.add(name)
+		keep_ancestors(f)
+		keep_descendents(f)
+
+	return { name:files[name] for name in keep }
+
+
+def propagate_metadata(files, metadata):
+	"""
+	Propagate all metadata through the graph, repeating until we can
+	walk though the entire graph without doing any propagation.
+	"""
+
+	def propagate(source, dest):
+		"""
+		Propagate the presence of metadata from one node to another.
+		"""
+		for name in metadata:
+			if source.metadata[name] and not dest.metadata[name]:
+				dest.metadata[name] = True
+				propagate.new_taint = True
+
+	propagate.new_taint = True
+
+	while propagate.new_taint:
+		propagate.new_taint = False
+
+		for (filename,f) in files.items():
+			previous = None
+
+			# Propagate from line to line within the file:
+			for line_number in sorted(f.lines.keys()):
+				node = f.lines[line_number]
+				if previous: propagate(previous, node)
+				previous = node
+
+			# Propagate from the last line to its include sites:
+			if previous:
+				last = previous
+				for recipient in f.edges_out:
+					propagate(last, recipient)
+
+
+def attribute_list(attrs):
+	"""
+	Convert a Python dictionary to a GraphViz dot attribute list.
+	"""
+	return '[ %s ]' % ', '.join(
+		[ '%s="%s"' % (k,v) for k,v in attrs.items() if v is not None]
+	)
+
+
+# Parse arguments.
+args = docopt.docopt(__doc__)
+
+filenames = args['<filename>']
+print('Parsing %d files' % len(filenames))
+
+outname = args['--output']
+outfile = open(outname, 'w') if outname != '-' else sys.stdout
+
+# Set up metadata: filter out specials we don't care about, add variables.
+metadata = {}
+for m in args['--metadata'].split(','):
+	if m.startswith('rule:'):
+		name = m[5:]
+		metadata[name] = re.compile('^%s:' % name)
+
+	elif m.startswith('var:'):
+		name = m[4:]
+		metadata[name] = re.compile('^%s((=)|(\t.*=))' % name)
+
+	else:
+		sys.stderr.write("Error: unknown metadata type: '%s'\n" % m)
+		sys.exit(1)
+
+pastels = [ '/pastel28/%d' % x for x in range(1,9) ]
+colours = dict(zip(metadata.keys(), itertools.cycle(pastels)))
+
+
+#
+# Parse the makefiles and propagate any metadata that we care about.
+#
+files = parse(filenames, metadata)
+propagate_metadata(files, metadata)
+
+if args['--filter-singletons']:
+	files = filter_singletons(files)
+
+if args['--filter-unconnected']:
+	files = filter_unconnected(files)
+
+
+#
+# Write the actual graph, starting with a legend:
+#
+outfile.write('''
+digraph {
+  rankdir = TB;
+
+  subgraph cluster_legend {
+    label = "Legend";
+    rankdir = TB;
+    edge [ style = invis; ];
+''')
+
+prev = None
+
+for name in sorted(metadata):
+	outfile.write('    "%s" %s;\n' % (name, attribute_list({
+		'style': 'filled',
+		'fillcolor': colours[name],
+	})))
+
+	if prev: outfile.write('    "%s" -> "%s";\n' % (prev, name))
+	prev = name
+
+outfile.write('\n  }\n')
+
+#
+# First, define each file as a cluster of lines (or just a box):
+#
+for (filename,f) in files.items():
+	safe_name = filename
+	for unsafe in [ '+', '.', '-', '$', '/' ]:
+		safe_name = safe_name.replace(unsafe, '_')
+
+	lines = sorted(f.lines.items(), key = lambda (l,_): l)
+
+	#
+	# If a file has no .include lines, it is only a source of include
+	# material rather than a recipient of it. Represent such files
+	# just a box.
+	#
+	if len(lines) == 0:
+		attrs = attribute_list({
+			'label': filename,
+			'shape': 'rectangle',
+		})
+
+		outfile.write('  "%s" %s;\n' % (filename, attrs))
+		continue
+
+
+	#
+	# If the file does contain .include lines, show these lines as
+	# nodes within a boxed cluster.
+	#
+	outfile.write('  subgraph cluster_%s {\n' % safe_name)
+	outfile.write('    rankdir = TB;\n')
+	outfile.write('    label = "%s";\n' % filename)
+
+	previous = None
+	prev_meta = False
+
+	for (line_number, node) in lines:
+		name = '%s:%d' % (filename, line_number)
+
+		fills = [ colours[meta] for (meta,val) in node.metadata.items() if val ]
+		fill = ':'.join(fills) if len(fills) > 0 else 'white'
+
+		shape = None
+		if len(node.definitions) > 0:
+			if prev_meta:
+				attrs['color'] = 'orange'
+				shape = 'tripleoctagon'
+			else:
+				shape = 'doublecircle'
+
+		attrs = {
+				'fillcolor': fill,
+				'label': line_number,
+				'shape': shape,
+				'style': 'filled',
+		}
+
+		outfile.write('    "%s" %s;\n' % (name, attribute_list(attrs)))
+
+		if previous:
+			attrs = { 'style': 'dotted' }
+			outfile.write('    "%s" -> "%s" %s;\n' % (
+				previous, name, attribute_list(attrs)))
+
+		previous = name
+		prev_meta = any(node.metadata.values()) if previous else False
+
+	outfile.write('  }\n')
+
+#
+# Now iterate through all nodes again to join the clusters up.
+# We have to do things this way so that we don't end up defining nodes
+# inside the wrong clusters.
+#
+for (filename,f) in files.items():
+	for (line_number, node) in f.lines.items():
+		name = '%s:%d' % (filename, line_number)
+
+		for source_filename in node.edges_in:
+			if source_filename not in files:
+				continue
+
+			source_file = files[source_filename]
+			source_lines = sorted(source_file.lines.keys())
+
+			if len(source_lines) == 0:
+				source = source_filename
+			else:
+				final_line = source_lines[-1]
+				source = '%s:%d' % (source_filename, final_line)
+
+			outfile.write('  "%s" -> "%s";\n' % (source, name))
+
+outfile.write('}\n')