Read in inputs.yml
and produce a GraphViz representation of the tables
In [1]:
import os
import yaml
import cea.scripts
import cea.interfaces.dashboard.inputs
from jinja2 import Template
import cea.inputlocator
import cea.config
config = cea.config.Configuration()
locator = cea.inputlocator.InputLocator(scenario=config.scenario)
schemas = cea.scripts.schemas()
OUTPUT_PATH = os.path.join(os.path.dirname(cea.config.__file__), "..", "docs", "tables_diagram.gv")
In [2]:
inputs_yml = os.path.abspath(os.path.join(os.path.dirname(cea.interfaces.dashboard.inputs.__file__), "inputs.yml"))
with open(inputs_yml, 'r') as fp:
inputs = yaml.load(fp)
inputs
Out[2]:
In [3]:
table_template = """
{{locator}} [shape=none, margin=0, label=<
<table border="0" cellborder="1" cellspacing="0" cellpadding="4" >
<tr><td bgcolor="lightblue">{{table}}</td></tr>
{% for field in fields %}<tr><td port="{{field.name}}" align="left">{{field.type}}: {{field.name}}</td></tr>
{% endfor %}
</table>>];
"""
In [4]:
table_defs = []
for table in inputs.keys():
lm = inputs[table]["location"]
fields = inputs[table]["fields"]
table_defs.append(Template(table_template).render(locator=lm, table=table, fields=fields))
In [5]:
print "\n".join(table_defs)
In [6]:
databases = set() # (lm, sheet)
connections = set() # con
for ltable in inputs:
lm = inputs[ltable]["location"]
for field in inputs[ltable]["fields"]:
if field["type"] == "choice":
lfield = field["name"]
rtable = field["choice_properties"]["lookup"]["path"]
sheet = field["choice_properties"]["lookup"]["sheet"]
rfield = field["choice_properties"]["lookup"]["column"]
databases.add((rtable, sheet))
connections.add((lm, lfield, rtable, sheet, rfield))
databases
Out[6]:
In [7]:
connections
Out[7]:
In [8]:
db_template = """
{{locator}}_{{sheet}} [shape=none, margin=0, label=<
<table border="0" cellborder="1" cellspacing="0" cellpadding="4" >
<tr><td bgcolor="lightgrey">{{table}}</td></tr>
{% for field in fields %}<tr><td port="{{field.name}}" align="left">{{field.type}}: {{field.name}}</td></tr>
{% endfor %}
</table>>];
"""
db_defs = []
for lm, sheet in databases:
xlsx = os.path.basename(getattr(locator, lm)())
table = "{file}:{sheet}".format(file=xlsx, sheet=sheet)
fields = []
for fn in schemas[lm]["schema"][sheet].keys():
name = fn.replace("&", "")
fields.append({"name": name, "type": schemas[lm]["schema"][sheet][fn]["types_found"][0]})
db_defs.append(Template(db_template).render(locator=lm, sheet=sheet, table=table, fields=fields))
print db_defs[0]
In [9]:
con_defs = []
for ltable, lfield, rtable, sheet, rfield in connections:
con_defs.append('{ltable}:"{lfield}" -> {rtable}_{sheet}:{rfield};'.format(ltable=ltable, lfield=lfield, rtable=rtable, sheet=sheet, rfield=rfield))
con_defs
Out[9]:
In [10]:
diagram_template = """
/* this diagram was generated by notebooks/2020.02.25-create-tables-entity-relationship-diagram.ipynb
you can preview it in VSCode with the "Graphviz Preview" extension.
*/
digraph tables_diagram {
graph [pad="0.5", nodesep="0.5", ranksep="2"];
node [shape=plain];
rankdir=LR;
{% for table in tables %}
{{table}}
{% endfor %}
{% for db in dbs %}
{{db}}
{% endfor %}
{% for con in cons %}
{{con}}
{% endfor %}
}
"""
with open(OUTPUT_PATH, 'w') as fp:
fp.write(Template(diagram_template).render(tables=table_defs, dbs=db_defs, cons=con_defs))