core/pkgs/build-support/references-by-popularity/closure-graph.py

# IMPORTANT: Making changes?
#
# Validate your changes with python3 ./closure-graph.py --test


# Using a simple algorithm, convert the references to a path in to a
# sorted list of dependent paths based on how often they're referenced
# and how deep in the tree they live. Equally-"popular" paths are then
# sorted by name.
#
# The existing writeClosure prints the paths in a simple ascii-based
# sorting of the paths.
#
# Sorting the paths by graph improves the chances that the difference
# between two builds appear near the end of the list, instead of near
# the beginning. This makes a difference for Nix builds which export a
# closure for another program to consume, if that program implements its
# own level of binary diffing.
#
# For an example, Docker Images. If each store path is a separate layer
# then Docker Images can be very efficiently transfered between systems,
# and we get very good cache reuse between images built with the same
# version of Nixpkgs. However, since Docker only reliably supports a
# small number of layers (42) it is important to pick the individual
# layers carefully. By storing very popular store paths in the first 40
# layers, we improve the chances that the next Docker image will share
# many of those layers.*
#
# Given the dependency tree:
#
#     A - B - C - D -\
#      \   \   \      \
#       \   \   \      \
#        \   \ - E ---- F
#         \- G
#
# Nodes which have multiple references are duplicated:
#
#     A - B - C - D - F
#      \   \   \
#       \   \   \- E - F
#        \   \
#         \   \- E - F
#          \
#           \- G
#
# Each leaf node is now replaced by a counter defaulted to 1:
#
#     A - B - C - D - (F:1)
#      \   \   \
#       \   \   \- E - (F:1)
#        \   \
#         \   \- E - (F:1)
#          \
#           \- (G:1)
#
# Then each leaf counter is merged with its parent node, replacing the
# parent node with a counter of 1, and each existing counter being
# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
#
#     A - B - C - (D:1, F:2)
#      \   \   \
#       \   \   \- (E:1, F:2)
#        \   \
#         \   \- (E:1, F:2)
#          \
#           \- (G:1)
#
# Then each leaf counter is merged with its parent node again, merging
# any counters, then incrementing each:
#
#     A - B - (C:1, D:2, E:2, F:5)
#      \   \
#       \   \- (E:1, F:2)
#        \
#         \- (G:1)
#
# And again:
#
#     A - (B:1, C:2, D:3, E:4, F:8)
#      \
#       \- (G:1)
#
# And again:
#
#     (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
#
# and then paths have the following "popularity":
#
#     A     1
#     B     2
#     C     3
#     D     4
#     E     5
#     F     9
#     G     2
#
# and the popularity contest would result in the paths being printed as:
#
#     F
#     E
#     D
#     C
#     B
#     G
#     A
#
# * Note: People who have used a Dockerfile before assume Docker's
# Layers are inherently ordered. However, this is not true -- Docker
# layers are content-addressable and are not explicitly layered until
# they are composed in to an Image.

import sys
import json
import unittest

from pprint import pprint
from collections import defaultdict


def debug(msg, *args, **kwargs):
    if False:
        print(
            "DEBUG: {}".format(
                msg.format(*args, **kwargs)
            ),
            file=sys.stderr
        )


# Find paths in the original dataset which are never referenced by
# any other paths
def find_roots(closures):
    roots = [];

    for closure in closures:
        path = closure['path']
        if not any_refer_to(path, closures):
            roots.append(path)

    return roots

class TestFindRoots(unittest.TestCase):
    def test_find_roots(self):
        self.assertCountEqual(
            find_roots([
                {
                    "path": "/nix/store/foo",
                    "references": [
                        "/nix/store/foo",
                        "/nix/store/bar"
                    ]
                },
                {
                    "path": "/nix/store/bar",
                    "references": [
                        "/nix/store/bar",
                        "/nix/store/tux"
                    ]
                },
                {
                    "path": "/nix/store/hello",
                    "references": [
                    ]
                }
            ]),
            ["/nix/store/foo", "/nix/store/hello"]
        )


def any_refer_to(path, closures):
    for closure in closures:
        if path != closure['path']:
            if path in closure['references']:
                return True
    return False

class TestAnyReferTo(unittest.TestCase):
    def test_has_references(self):
        self.assertTrue(
            any_refer_to(
                "/nix/store/bar",
                [
                    {
                        "path": "/nix/store/foo",
                        "references": [
                            "/nix/store/bar"
                        ]
                    },
                ]
            ),
        )
    def test_no_references(self):
        self.assertFalse(
            any_refer_to(
                "/nix/store/foo",
                [
                    {
                        "path": "/nix/store/foo",
                        "references": [
                            "/nix/store/foo",
                            "/nix/store/bar"
                        ]
                    },
                ]
            ),
        )

def all_paths(closures):
    paths = []
    for closure in closures:
        paths.append(closure['path'])
        paths.extend(closure['references'])
    paths.sort()
    return list(set(paths))


class TestAllPaths(unittest.TestCase):
    def test_returns_all_paths(self):
        self.assertCountEqual(
            all_paths([
                {
                    "path": "/nix/store/foo",
                    "references": [
                        "/nix/store/foo",
                        "/nix/store/bar"
                    ]
                },
                {
                    "path": "/nix/store/bar",
                    "references": [
                        "/nix/store/bar",
                        "/nix/store/tux"
                    ]
                },
                {
                    "path": "/nix/store/hello",
                    "references": [
                    ]
                }
            ]),
            ["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]
        )
    def test_no_references(self):
        self.assertFalse(
            any_refer_to(
                "/nix/store/foo",
                [
                    {
                        "path": "/nix/store/foo",
                        "references": [
                            "/nix/store/foo",
                            "/nix/store/bar"
                        ]
                    },
                ]
            ),
        )

# Convert:
#
# [
#    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
#    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
#    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
#    { path: /nix/store/tux, references: [ /nix/store/tux ] }
#  ]
#
# To:
#    {
#      /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
#      /nix/store/bar: [ /nix/store/baz ],
#      /nix/store/baz: [ /nix/store/tux ] },
#      /nix/store/tux: [ ]
#    }
#
# Note that it drops self-references to avoid loops.
def make_lookup(closures):
    lookup = {}

    for closure in closures:
        # paths often self-refer
        nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]
        lookup[closure['path']] = nonreferential_paths

    return lookup

class TestMakeLookup(unittest.TestCase):
    def test_returns_lookp(self):
        self.assertDictEqual(
            make_lookup([
                {
                    "path": "/nix/store/foo",
                    "references": [
                        "/nix/store/foo",
                        "/nix/store/bar"
                    ]
                },
                {
                    "path": "/nix/store/bar",
                    "references": [
                        "/nix/store/bar",
                        "/nix/store/tux"
                    ]
                },
                {
                    "path": "/nix/store/hello",
                    "references": [
                    ]
                }
            ]),
            {
                "/nix/store/foo": [ "/nix/store/bar" ],
                "/nix/store/bar": [ "/nix/store/tux" ],
                "/nix/store/hello": [ ],
            }
        )

# Convert:
#
# /nix/store/foo with
#  {
#    /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
#    /nix/store/bar: [ /nix/store/baz ],
#    /nix/store/baz: [ /nix/store/tux ] },
#    /nix/store/tux: [ ]
#  }
#
# To:
#
# {
#   /nix/store/bar: {
#                    /nix/store/baz: {
#                                     /nix/store/tux: {}
#                    }
#   },
#   /nix/store/baz: {
#                   /nix/store/tux: {}
#   }
# }
subgraphs_cache = {}
def make_graph_segment_from_root(root, lookup):
    global subgraphs_cache
    children = {}
    for ref in lookup[root]:
        # make_graph_segment_from_root is a pure function, and will
        # always return the same result based on a given input. Thus,
        # cache computation.
        #
        # Python's assignment will use a pointer, preventing memory
        # bloat for large graphs.
        if ref not in subgraphs_cache:
            debug("Subgraph Cache miss on {}".format(ref))
            subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)
        else:
            debug("Subgraph Cache hit on {}".format(ref))
        children[ref] = subgraphs_cache[ref]
    return children

class TestMakeGraphSegmentFromRoot(unittest.TestCase):
    def test_returns_graph(self):
        self.assertDictEqual(
            make_graph_segment_from_root("/nix/store/foo", {
                "/nix/store/foo": [ "/nix/store/bar" ],
                "/nix/store/bar": [ "/nix/store/tux" ],
                "/nix/store/tux": [ ],
                "/nix/store/hello": [ ],
            }),
            {
                "/nix/store/bar": {
                    "/nix/store/tux": {}
                }
            }
        )
    def test_returns_graph_tiny(self):
        self.assertDictEqual(
            make_graph_segment_from_root("/nix/store/tux", {
                "/nix/store/foo": [ "/nix/store/bar" ],
                "/nix/store/bar": [ "/nix/store/tux" ],
                "/nix/store/tux": [ ],
            }),
            {}
        )

# Convert a graph segment in to a popularity-counted dictionary:
#
# From:
# {
#    /nix/store/foo: {
#                      /nix/store/bar: {
#                                        /nix/store/baz: {
#                                                           /nix/store/tux: {}
#                                        }
#                      }
#                      /nix/store/baz: {
#                                         /nix/store/tux: {}
#                      }
#    }
# }
#
# to:
# [
#   /nix/store/foo: 1
#   /nix/store/bar: 2
#   /nix/store/baz: 4
#   /nix/store/tux: 6
# ]
popularity_cache = {}
def graph_popularity_contest(full_graph):
    global popularity_cache
    popularity = defaultdict(int)
    for path, subgraph in full_graph.items():
        popularity[path] += 1
        # graph_popularity_contest is a pure function, and will
        # always return the same result based on a given input. Thus,
        # cache computation.
        #
        # Python's assignment will use a pointer, preventing memory
        # bloat for large graphs.
        if path not in popularity_cache:
            debug("Popularity Cache miss on {}", path)
            popularity_cache[path] = graph_popularity_contest(subgraph)
        else:
            debug("Popularity Cache hit on {}", path)

        subcontest = popularity_cache[path]
        for subpath, subpopularity in subcontest.items():
            debug("Calculating popularity for {}", subpath)
            popularity[subpath] += subpopularity + 1

    return popularity

class TestGraphPopularityContest(unittest.TestCase):
    def test_counts_popularity(self):
        self.assertDictEqual(
            graph_popularity_contest({
                "/nix/store/foo": {
                    "/nix/store/bar": {
                        "/nix/store/baz": {
                            "/nix/store/tux": {}
                        }
                    },
                    "/nix/store/baz": {
                        "/nix/store/tux": {}
                    }
                }
            }),
            {
                   "/nix/store/foo": 1,
                   "/nix/store/bar": 2,
                   "/nix/store/baz": 4,
                   "/nix/store/tux": 6,
            }
        )

# Emit a list of packages by popularity, most first:
#
# From:
# [
#   /nix/store/foo: 1
#   /nix/store/bar: 1
#   /nix/store/baz: 2
#   /nix/store/tux: 2
# ]
#
# To:
# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
def order_by_popularity(paths):
    paths_by_popularity = defaultdict(list)
    popularities = []
    for path, popularity in paths.items():
        popularities.append(popularity)
        paths_by_popularity[popularity].append(path)

    popularities = list(set(popularities))
    popularities.sort()

    flat_ordered = []
    for popularity in popularities:
        paths = paths_by_popularity[popularity]
        paths.sort(key=package_name)

        flat_ordered.extend(reversed(paths))
    return list(reversed(flat_ordered))


class TestOrderByPopularity(unittest.TestCase):
    def test_returns_in_order(self):
        self.assertEqual(
            order_by_popularity({
                   "/nix/store/foo": 1,
                   "/nix/store/bar": 1,
                   "/nix/store/baz": 2,
                   "/nix/store/tux": 2,
            }),
            [
                "/nix/store/baz",
                "/nix/store/tux",
                "/nix/store/bar",
                "/nix/store/foo"
            ]
        )

def package_name(path):
    parts = path.split('-')
    start = parts.pop(0)
    # don't throw away any data, so the order is always the same.
    # even in cases where only the hash at the start has changed.
    parts.append(start)
    return '-'.join(parts)

def main():
    filename = sys.argv[1]
    key = sys.argv[2]

    debug("Loading from {}", filename)
    with open(filename) as f:
        data = json.load(f)

    # Data comes in as:
    # [
    #    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
    #    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
    #    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
    #    { path: /nix/store/tux, references: [ /nix/store/tux ] }
    #  ]
    #
    # and we want to get out a list of paths ordered by how universally,
    # important they are, ie: tux is referenced by every path, transitively
    # so it should be #1
    #
    # [
    #   /nix/store/tux,
    #   /nix/store/baz,
    #   /nix/store/bar,
    #   /nix/store/foo,
    # ]
    graph = data[key]

    debug("Finding roots from {}", key)
    roots = find_roots(graph);
    debug("Making lookup for {}", key)
    lookup = make_lookup(graph)

    full_graph = {}
    for root in roots:
        debug("Making full graph for {}", root)
        full_graph[root] = make_graph_segment_from_root(root, lookup)

    debug("Running contest")
    contest = graph_popularity_contest(full_graph)
    debug("Ordering by popularity")
    ordered = order_by_popularity(contest)
    debug("Checking for missing paths")
    missing = []
    for path in all_paths(graph):
        if path not in ordered:
            missing.append(path)

    ordered.extend(missing)
    print("\n".join(ordered))

if "--test" in sys.argv:
    # Don't pass --test otherwise unittest gets mad
    unittest.main(argv = [f for f in sys.argv if f != "--test" ])
else:
    main()
pkgs: init 2024-05-02 00:46:19 +00:00			`# IMPORTANT: Making changes?`
			`#`
			`# Validate your changes with python3 ./closure-graph.py --test`


			`# Using a simple algorithm, convert the references to a path in to a`
			`# sorted list of dependent paths based on how often they're referenced`
			`# and how deep in the tree they live. Equally-"popular" paths are then`
			`# sorted by name.`
			`#`
			`# The existing writeClosure prints the paths in a simple ascii-based`
			`# sorting of the paths.`
			`#`
			`# Sorting the paths by graph improves the chances that the difference`
			`# between two builds appear near the end of the list, instead of near`
			`# the beginning. This makes a difference for Nix builds which export a`
			`# closure for another program to consume, if that program implements its`
			`# own level of binary diffing.`
			`#`
			`# For an example, Docker Images. If each store path is a separate layer`
			`# then Docker Images can be very efficiently transfered between systems,`
			`# and we get very good cache reuse between images built with the same`
			`# version of Nixpkgs. However, since Docker only reliably supports a`
			`# small number of layers (42) it is important to pick the individual`
			`# layers carefully. By storing very popular store paths in the first 40`
			`# layers, we improve the chances that the next Docker image will share`
			`# many of those layers.*`
			`#`
			`# Given the dependency tree:`
			`#`
			`# A - B - C - D -\`
			`# \ \ \ \`
			`# \ \ \ \`
			`# \ \ - E ---- F`
			`# \- G`
			`#`
			`# Nodes which have multiple references are duplicated:`
			`#`
			`# A - B - C - D - F`
			`# \ \ \`
			`# \ \ \- E - F`
			`# \ \`
			`# \ \- E - F`
			`# \`
			`# \- G`
			`#`
			`# Each leaf node is now replaced by a counter defaulted to 1:`
			`#`
			`# A - B - C - D - (F:1)`
			`# \ \ \`
			`# \ \ \- E - (F:1)`
			`# \ \`
			`# \ \- E - (F:1)`
			`# \`
			`# \- (G:1)`
			`#`
			`# Then each leaf counter is merged with its parent node, replacing the`
			`# parent node with a counter of 1, and each existing counter being`
			# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
			`#`
			`# A - B - C - (D:1, F:2)`
			`# \ \ \`
			`# \ \ \- (E:1, F:2)`
			`# \ \`
			`# \ \- (E:1, F:2)`
			`# \`
			`# \- (G:1)`
			`#`
			`# Then each leaf counter is merged with its parent node again, merging`
			`# any counters, then incrementing each:`
			`#`
			`# A - B - (C:1, D:2, E:2, F:5)`
			`# \ \`
			`# \ \- (E:1, F:2)`
			`# \`
			`# \- (G:1)`
			`#`
			`# And again:`
			`#`
			`# A - (B:1, C:2, D:3, E:4, F:8)`
			`# \`
			`# \- (G:1)`
			`#`
			`# And again:`
			`#`
			`# (A:1, B:2, C:3, D:4, E:5, F:9, G:2)`
			`#`
			`# and then paths have the following "popularity":`
			`#`
			`# A 1`
			`# B 2`
			`# C 3`
			`# D 4`
			`# E 5`
			`# F 9`
			`# G 2`
			`#`
			`# and the popularity contest would result in the paths being printed as:`
			`#`
			`# F`
			`# E`
			`# D`
			`# C`
			`# B`
			`# G`
			`# A`
			`#`
			`# * Note: People who have used a Dockerfile before assume Docker's`
			`# Layers are inherently ordered. However, this is not true -- Docker`
			`# layers are content-addressable and are not explicitly layered until`
			`# they are composed in to an Image.`

			`import sys`
			`import json`
			`import unittest`

			`from pprint import pprint`
			`from collections import defaultdict`


			`def debug(msg, args, *kwargs):`
			`if False:`
			`print(`
			`"DEBUG: {}".format(`
			`msg.format(args, *kwargs)`
			`),`
			`file=sys.stderr`
			`)`


			`# Find paths in the original dataset which are never referenced by`
			`# any other paths`
			`def find_roots(closures):`
			`roots = [];`

			`for closure in closures:`
			`path = closure['path']`
			`if not any_refer_to(path, closures):`
			`roots.append(path)`

			`return roots`

			`class TestFindRoots(unittest.TestCase):`
			`def test_find_roots(self):`
			`self.assertCountEqual(`
			`find_roots([`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/foo",`
			`"/nix/store/bar"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/bar",`
			`"references": [`
			`"/nix/store/bar",`
			`"/nix/store/tux"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/hello",`
			`"references": [`
			`]`
			`}`
			`]),`
			`["/nix/store/foo", "/nix/store/hello"]`
			`)`


			`def any_refer_to(path, closures):`
			`for closure in closures:`
			`if path != closure['path']:`
			`if path in closure['references']:`
			`return True`
			`return False`

			`class TestAnyReferTo(unittest.TestCase):`
			`def test_has_references(self):`
			`self.assertTrue(`
			`any_refer_to(`
			`"/nix/store/bar",`
			`[`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/bar"`
			`]`
			`},`
			`]`
			`),`
			`)`
			`def test_no_references(self):`
			`self.assertFalse(`
			`any_refer_to(`
			`"/nix/store/foo",`
			`[`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/foo",`
			`"/nix/store/bar"`
			`]`
			`},`
			`]`
			`),`
			`)`

			`def all_paths(closures):`
			`paths = []`
			`for closure in closures:`
			`paths.append(closure['path'])`
			`paths.extend(closure['references'])`
			`paths.sort()`
			`return list(set(paths))`


			`class TestAllPaths(unittest.TestCase):`
			`def test_returns_all_paths(self):`
			`self.assertCountEqual(`
			`all_paths([`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/foo",`
			`"/nix/store/bar"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/bar",`
			`"references": [`
			`"/nix/store/bar",`
			`"/nix/store/tux"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/hello",`
			`"references": [`
			`]`
			`}`
			`]),`
			`["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]`
			`)`
			`def test_no_references(self):`
			`self.assertFalse(`
			`any_refer_to(`
			`"/nix/store/foo",`
			`[`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/foo",`
			`"/nix/store/bar"`
			`]`
			`},`
			`]`
			`),`
			`)`

			`# Convert:`
			`#`
			`# [`
			`# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },`
			`# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },`
			`# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },`
			`# { path: /nix/store/tux, references: [ /nix/store/tux ] }`
			`# ]`
			`#`
			`# To:`
			`# {`
			`# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],`
			`# /nix/store/bar: [ /nix/store/baz ],`
			`# /nix/store/baz: [ /nix/store/tux ] },`
			`# /nix/store/tux: [ ]`
			`# }`
			`#`
			`# Note that it drops self-references to avoid loops.`
			`def make_lookup(closures):`
			`lookup = {}`

			`for closure in closures:`
			`# paths often self-refer`
			`nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]`
			`lookup[closure['path']] = nonreferential_paths`

			`return lookup`

			`class TestMakeLookup(unittest.TestCase):`
			`def test_returns_lookp(self):`
			`self.assertDictEqual(`
			`make_lookup([`
			`{`
			`"path": "/nix/store/foo",`
			`"references": [`
			`"/nix/store/foo",`
			`"/nix/store/bar"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/bar",`
			`"references": [`
			`"/nix/store/bar",`
			`"/nix/store/tux"`
			`]`
			`},`
			`{`
			`"path": "/nix/store/hello",`
			`"references": [`
			`]`
			`}`
			`]),`
			`{`
			`"/nix/store/foo": [ "/nix/store/bar" ],`
			`"/nix/store/bar": [ "/nix/store/tux" ],`
			`"/nix/store/hello": [ ],`
			`}`
			`)`

			`# Convert:`
			`#`
			`# /nix/store/foo with`
			`# {`
			`# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],`
			`# /nix/store/bar: [ /nix/store/baz ],`
			`# /nix/store/baz: [ /nix/store/tux ] },`
			`# /nix/store/tux: [ ]`
			`# }`
			`#`
			`# To:`
			`#`
			`# {`
			`# /nix/store/bar: {`
			`# /nix/store/baz: {`
			`# /nix/store/tux: {}`
			`# }`
			`# },`
			`# /nix/store/baz: {`
			`# /nix/store/tux: {}`
			`# }`
			`# }`
			`subgraphs_cache = {}`
			`def make_graph_segment_from_root(root, lookup):`
			`global subgraphs_cache`
			`children = {}`
			`for ref in lookup[root]:`
			`# make_graph_segment_from_root is a pure function, and will`
			`# always return the same result based on a given input. Thus,`
			`# cache computation.`
			`#`
			`# Python's assignment will use a pointer, preventing memory`
			`# bloat for large graphs.`
			`if ref not in subgraphs_cache:`
			`debug("Subgraph Cache miss on {}".format(ref))`
			`subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)`
			`else:`
			`debug("Subgraph Cache hit on {}".format(ref))`
			`children[ref] = subgraphs_cache[ref]`
			`return children`

			`class TestMakeGraphSegmentFromRoot(unittest.TestCase):`
			`def test_returns_graph(self):`
			`self.assertDictEqual(`
			`make_graph_segment_from_root("/nix/store/foo", {`
			`"/nix/store/foo": [ "/nix/store/bar" ],`
			`"/nix/store/bar": [ "/nix/store/tux" ],`
			`"/nix/store/tux": [ ],`
			`"/nix/store/hello": [ ],`
			`}),`
			`{`
			`"/nix/store/bar": {`
			`"/nix/store/tux": {}`
			`}`
			`}`
			`)`
			`def test_returns_graph_tiny(self):`
			`self.assertDictEqual(`
			`make_graph_segment_from_root("/nix/store/tux", {`
			`"/nix/store/foo": [ "/nix/store/bar" ],`
			`"/nix/store/bar": [ "/nix/store/tux" ],`
			`"/nix/store/tux": [ ],`
			`}),`
			`{}`
			`)`

			`# Convert a graph segment in to a popularity-counted dictionary:`
			`#`
			`# From:`
			`# {`
			`# /nix/store/foo: {`
			`# /nix/store/bar: {`
			`# /nix/store/baz: {`
			`# /nix/store/tux: {}`
			`# }`
			`# }`
			`# /nix/store/baz: {`
			`# /nix/store/tux: {}`
			`# }`
			`# }`
			`# }`
			`#`
			`# to:`
			`# [`
			`# /nix/store/foo: 1`
			`# /nix/store/bar: 2`
			`# /nix/store/baz: 4`
			`# /nix/store/tux: 6`
			`# ]`
			`popularity_cache = {}`
			`def graph_popularity_contest(full_graph):`
			`global popularity_cache`
			`popularity = defaultdict(int)`
			`for path, subgraph in full_graph.items():`
			`popularity[path] += 1`
			`# graph_popularity_contest is a pure function, and will`
			`# always return the same result based on a given input. Thus,`
			`# cache computation.`
			`#`
			`# Python's assignment will use a pointer, preventing memory`
			`# bloat for large graphs.`
			`if path not in popularity_cache:`
			`debug("Popularity Cache miss on {}", path)`
			`popularity_cache[path] = graph_popularity_contest(subgraph)`
			`else:`
			`debug("Popularity Cache hit on {}", path)`

			`subcontest = popularity_cache[path]`
			`for subpath, subpopularity in subcontest.items():`
			`debug("Calculating popularity for {}", subpath)`
			`popularity[subpath] += subpopularity + 1`

			`return popularity`

			`class TestGraphPopularityContest(unittest.TestCase):`
			`def test_counts_popularity(self):`
			`self.assertDictEqual(`
			`graph_popularity_contest({`
			`"/nix/store/foo": {`
			`"/nix/store/bar": {`
			`"/nix/store/baz": {`
			`"/nix/store/tux": {}`
			`}`
			`},`
			`"/nix/store/baz": {`
			`"/nix/store/tux": {}`
			`}`
			`}`
			`}),`
			`{`
			`"/nix/store/foo": 1,`
			`"/nix/store/bar": 2,`
			`"/nix/store/baz": 4,`
			`"/nix/store/tux": 6,`
			`}`
			`)`

			`# Emit a list of packages by popularity, most first:`
			`#`
			`# From:`
			`# [`
			`# /nix/store/foo: 1`
			`# /nix/store/bar: 1`
			`# /nix/store/baz: 2`
			`# /nix/store/tux: 2`
			`# ]`
			`#`
			`# To:`
			`# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]`
			`def order_by_popularity(paths):`
			`paths_by_popularity = defaultdict(list)`
			`popularities = []`
			`for path, popularity in paths.items():`
			`popularities.append(popularity)`
			`paths_by_popularity[popularity].append(path)`

			`popularities = list(set(popularities))`
			`popularities.sort()`

			`flat_ordered = []`
			`for popularity in popularities:`
			`paths = paths_by_popularity[popularity]`
			`paths.sort(key=package_name)`

			`flat_ordered.extend(reversed(paths))`
			`return list(reversed(flat_ordered))`


			`class TestOrderByPopularity(unittest.TestCase):`
			`def test_returns_in_order(self):`
			`self.assertEqual(`
			`order_by_popularity({`
			`"/nix/store/foo": 1,`
			`"/nix/store/bar": 1,`
			`"/nix/store/baz": 2,`
			`"/nix/store/tux": 2,`
			`}),`
			`[`
			`"/nix/store/baz",`
			`"/nix/store/tux",`
			`"/nix/store/bar",`
			`"/nix/store/foo"`
			`]`
			`)`

			`def package_name(path):`
			`parts = path.split('-')`
			`start = parts.pop(0)`
			`# don't throw away any data, so the order is always the same.`
			`# even in cases where only the hash at the start has changed.`
			`parts.append(start)`
			`return '-'.join(parts)`

			`def main():`
			`filename = sys.argv[1]`
			`key = sys.argv[2]`

			`debug("Loading from {}", filename)`
			`with open(filename) as f:`
			`data = json.load(f)`

			`# Data comes in as:`
			`# [`
			`# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },`
			`# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },`
			`# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },`
			`# { path: /nix/store/tux, references: [ /nix/store/tux ] }`
			`# ]`
			`#`
			`# and we want to get out a list of paths ordered by how universally,`
			`# important they are, ie: tux is referenced by every path, transitively`
			`# so it should be #1`
			`#`
			`# [`
			`# /nix/store/tux,`
			`# /nix/store/baz,`
			`# /nix/store/bar,`
			`# /nix/store/foo,`
			`# ]`
			`graph = data[key]`

			`debug("Finding roots from {}", key)`
			`roots = find_roots(graph);`
			`debug("Making lookup for {}", key)`
			`lookup = make_lookup(graph)`

			`full_graph = {}`
			`for root in roots:`
			`debug("Making full graph for {}", root)`
			`full_graph[root] = make_graph_segment_from_root(root, lookup)`

			`debug("Running contest")`
			`contest = graph_popularity_contest(full_graph)`
			`debug("Ordering by popularity")`
			`ordered = order_by_popularity(contest)`
			`debug("Checking for missing paths")`
			`missing = []`
			`for path in all_paths(graph):`
			`if path not in ordered:`
			`missing.append(path)`

			`ordered.extend(missing)`
			`print("\n".join(ordered))`

			`if "--test" in sys.argv:`
			`# Don't pass --test otherwise unittest gets mad`
			`unittest.main(argv = [f for f in sys.argv if f != "--test" ])`
			`else:`
			`main()`