In [1]:
import pandas as pd
import math
df = pd.read_csv("random_data_flat_file.csv")
df["parent"] = df["parent"].fillna("")
In [2]:
fields_to_retain = ["colour", "email_address", "full_name", "id", "job_title", "parent", "phone_numbers", "statistic_1", "statistic_2", "summary", "web_url", "mugshot_url_template"]
In [3]:
data = df.to_dict(orient="records")
In [4]:
#We're going to start with a dict which has a key for each user. This isn't really a tree yet
#but it will become one
#We then go through the dict one item at a time, 'tidying' leaf nodes into their parents and iterate
#No doubt there's a much for efficient way of doing this.
tree = {}
for record in data:
contents = {}
contents["parent"] = record["parent"]
contents["children"] = None
for i in fields_to_retain:
contents[i] = record[i]
tree[record["id"]] = contents
In [5]:
#A set of all the parents
parents = set([r["parent"] for r in data])
In [6]:
#Roll up any item that isn't a parent and iterate
old_tree = tree.copy()
for i in range(1,20):
new_tree = old_tree.copy()
for key, value in old_tree.iteritems(): #The key is the id and the value is all of the data
#If the item isn't a parent (i.e. is a leaf) roll into node above
#If no parent then skip:
if type(value["parent"]) == float and math.isnan(value["parent"]): #i.e. it's nan
continue
if value["parent"] == "":
continue
if key not in parents and value["parent"]: #If it's a leaf and it has a parent
if not new_tree[value["parent"]]["children"]:
new_tree[value["parent"]]["children"] = []
new_tree[value["parent"]]["children"].append(value)
new_tree[value["parent"]]["children"].sort(key=lambda x: x["full_name"])
del new_tree[key]
old_tree = new_tree.copy()
parents = set()
for key in old_tree:
parents.add(old_tree[key]["parent"])
In [7]:
#ALso need data that for the select/search box in the app
df["text"] = df["full_name"] + ", " + df["job_title"]
df2 = df[["id","text"]]
select_box = df2.to_dict(orient="records")
select_box.sort(key=lambda x: x["text"])
In [8]:
final_data = {"tree": new_tree[new_tree.keys()[0]], "select_box":select_box} #Insert id of CEO here
In [9]:
import json
with open('../website/data/orgchart_data.json', 'w') as outfile:
json.dump(final_data, outfile)