In [9]:
import os
import yaml
import fnmatch
from conda_build.metadata import MetaData
from pprint import pprint
import binstar_client
import matplotlib.pyplot as plt
import copy
%matplotlib inline
import networkx as nx
In [10]:
token = open('/home/edill/dev/dotfiles/tokens/lightsource2-testing.token', 'r').read().strip()
In [11]:
cli = binstar_client.Binstar(domain="https://api.anaconda.org", token=token)
In [12]:
packages_on_conda_forge = set([f['full_name'].split('/')[1] for f in cli.show_channel('main', 'conda-forge')['files']])
In [14]:
def sanitize_names(list_of_names):
list_of_names = [name.split(' ')[0] for name in list_of_names]
list_of_names = [name for name in list_of_names]# if name not in packages_on_conda_forge]
return list_of_names
In [127]:
# Get the list of folders to check
# get the tagged stack
folders = ['/home/edill/dev/conda/lightsource2-recipes/recipes-tag/',
'/home/edill/dev/conda/lightsource2-recipes/recipes-dev/',
'/home/edill/dev/conda/lightsource2-recipes/recipes-config/',
'/home/edill/dev/conda/skbeam-recipes/']
In [142]:
run = {}
build = {}
test = {}
for path_to_recipes in folders:
for dirpath, dirnames, filenames in os.walk(path_to_recipes):
if 'meta.yaml' not in filenames:
continue
# we are not in the right directory
print("evaluating", dirpath)
meta = MetaData(dirpath)
name = meta.meta['package']['name']
build[name] = sanitize_names(meta.meta.get('requirements', {}).get('build', []))
run[name] = sanitize_names(meta.meta.get('requirements', {}).get('run', []))
test[name] = sanitize_names(meta.meta.get('test', {}).get('requires', []))
In [143]:
union = {k: set(test.get(k, []) + run.get(k, []) + build.get(k, []))
for k in set(list(test.keys()) + list(build.keys()) + list(run.keys()))}
In [144]:
# drop all extra packages that I do not have conda recipes for
cleaned_union = {}
for name, items in union.items():
cleaned_items = set()
print("Checking", name)
for item in items:
if item not in union:
print('\tdropping', item)
else:
cleaned_items.add(item)
print('\n')
cleaned_union[name] = cleaned_items
In [145]:
kw = dict(node_size=5, node_color='w', edge_color='b', alpha=0.25)
def make_network(packages_dict):
G = nx.DiGraph()
for package, deps in packages_dict.items():
# print(package)
for dep in deps:
# print('\t{}'.format(dep))
G.add_edge(package, dep)
return G
In [146]:
run_G = make_network(run)
build_G = make_network(build)
all_G = make_network(union)
In [131]:
import graphviz as gv
need_to_build_kwargs = {'color': 'red', 'shape': 'triangle'}
conda_forge_kwargs = {'color': 'blue', 'shape': 'diamond'}
anaconda_kwargs = {'color': 'black', 'shape': 'pentagon'}
# dev_kwargs = {'color': 'cyan3', 'shape': 'hexagon'}
packages_to_ignore = ['numpy', 'python', 'setuptools', 'six']
def make_gv_network(packages_dict):
graph = gv.Digraph(format='svg')
all_packages = set()
for package, deps in packages_dict.items():
all_packages.add(package)
all_packages = all_packages.union(deps)
# add all the nodes
for pkg in all_packages:
if pkg in packages_to_ignore:
continue
if pkg in packages_on_conda_forge:
graph.node(pkg, **conda_forge_kwargs)
elif pkg not in union:
graph.node(pkg, **anaconda_kwargs)
else:
graph.node(pkg, **need_to_build_kwargs)
# build the edges
for package, deps in packages_dict.items():
for dep in deps:
if dep in packages_to_ignore:
continue
graph.edge(package, dep)
return graph
gvG = make_gv_network(union)
gvG.render('all_dependencies')
Out[131]:
In [124]:
gvG.view()
Out[124]:
In [154]:
print("This is an attempt to characterize our build stack in terms of serial ordering")
print("As in: start from the top and work your way down to hit all the dependencies\n")
counter = 0
for item in reversed(nx.topological_sort(all_G)):
if item in packages_on_conda_forge:
print('##: {} # Package already on conda-forge'.format(item))
elif item not in union:
print('##: {} # Package not on conda-forge and is not in our recipes'.format(item))
else:
counter += 1
print('{:>2}: {}'.format(counter, item))
In [151]:
def print_and_trim(graph):
nodes = [x for x in (G) if G.out_degree(x) == 0]
for node in nodes:
G.remove_node(node)
for idx, node in enumerate(nodes):
if node in packages_on_conda_forge:
continue
elif node not in cleaned_union:
continue
else:
nodes[idx] = node.upper()
pprint(sorted(nodes, key=lambda x: x.lower()))
G = all_G.copy()
G.reverse()
print("Packges in all CAPS are packages that still need to be built")
print("This is an attempt to characterize our build stack in terms "
"of which packages can be built in parallel")
print("Intra-group can be built in parallel")
print("Each group should be built serially")
count = 0
while G.nodes():
print('\nGROUP', count+1)
print_and_trim(G)
count += 1
if count > 50:
break