In [1]:
import pandas as pd
import math
df = pd.read_csv("random_data_flat_file.csv")
df["parent"] = df["parent"].fillna("")

In [2]:
fields_to_retain = ["colour", "email_address", "full_name", "id", "job_title", "parent", "phone_numbers", "statistic_1", "statistic_2", "summary", "web_url", "mugshot_url_template"]

In [3]:
data = df.to_dict(orient="records")

In [4]:
#We're going to start with a dict which has a key for each user.  This isn't really a tree yet
#but it will become one

#We then go through the dict one item at a time, 'tidying' leaf nodes into their parents and iterate

#No doubt there's a much for efficient way of doing this.

tree = {}
for record in data:
    contents = {}
    contents["parent"] = record["parent"]
    contents["children"] = None
    
    for i in fields_to_retain:
        contents[i] = record[i]
    tree[record["id"]] = contents

In [5]:
#A set of all the parents
parents = set([r["parent"] for r in data])

In [6]:
#Roll up any item that isn't a parent and iterate
old_tree = tree.copy()

for i in range(1,20):
    new_tree = old_tree.copy()

    for key, value in old_tree.iteritems():  #The key is the id and the value is all of the data
        #If the item isn't a parent (i.e. is a leaf) roll into node above
        
        #If no parent then skip:
        if type(value["parent"]) == float and math.isnan(value["parent"]):  #i.e. it's nan
            continue
        
        if value["parent"] == "":
            continue
            

        if key not in parents and value["parent"]:  #If it's a leaf and it has a parent 
            
            if not new_tree[value["parent"]]["children"]:
                new_tree[value["parent"]]["children"] = []
                
            
            new_tree[value["parent"]]["children"].append(value)
            new_tree[value["parent"]]["children"].sort(key=lambda x: x["full_name"])
            del new_tree[key]

    old_tree = new_tree.copy()

    parents = set()
    for key in old_tree:
        parents.add(old_tree[key]["parent"])

In [7]:
#ALso need data that for the select/search box in the app

df["text"] = df["full_name"] + ", " + df["job_title"]

df2 = df[["id","text"]]
select_box = df2.to_dict(orient="records")
        
select_box.sort(key=lambda x: x["text"])

In [8]:
final_data = {"tree": new_tree[new_tree.keys()[0]], "select_box":select_box} #Insert id of CEO here

In [9]:
import json
with open('../website/data/orgchart_data.json', 'w') as outfile:
    json.dump(final_data, outfile)