In [9]:
#Load in the data saved from the API
import json
with open('allrelationships.json') as data_file:    
    r_data = json.load(data_file)

with open('allusers.json') as data_file:    
    u_data = json.load(data_file)
    
with open('allactivity.json') as data_file:    
    a_data = json.load(data_file)
    
a_data = {i["id"]: i for i in a_data}

In [10]:
#Flatten the data structure of the users data
not_found = 0
for u in u_data:
    u["phone_numbers"] = ", ".join([i["number"] for i in u["contact"]["phone_numbers"]])
    u["email_address"] = u["contact"]["email_addresses"][0]["address"]   
    try:
        u["messages_in_last_180_days"] =a_data[str(u["id"])]["messages_in_last_180_days"]
        u["all_messages"] =a_data[str(u["id"])]["all_messages"]
    except:
        not_found += 1

In [11]:
#Add activity data into user data
for count, item in enumerate(r_data):
    item["individual"] = u_data[count]

In [12]:
fields_to_retain = ["id", "full_name", "job_title", "summary", "web_url", "interests", "mugshot_url_template", "phone_numbers","email_address","messages_in_last_180_days","all_messages"]

In [13]:
#Tidy up the data from the relationship API to keep only the bits we need
def mapper(x):
    try:
        superior = x["superiors"][0]["id"]
    except:
        superior = None
        
    return_dict = {}
    
    return_dict["parent"] = superior
    for i in fields_to_retain:
        return_dict[i] = x["individual"][i]
        
        if i == "full_name":
            return_dict[i] = return_dict[i].title()
            
        if i == "mugshot_url_template":
            return_dict[i] = return_dict[i].replace("{width}", "200")
            return_dict[i] = return_dict[i].replace("{height}", "200")
            
    
    return return_dict
        
r_data2 = map(mapper, r_data)

In [14]:
#We're going to start with a dict which has a key for each user.  THis isn't really a tree yet
#but it will become one

##We then go through the dict one item at a time, 'tidying' leaf nodes into their parents and iterate

#No doubt there's a much for efficient way of doing this.

tree = {}
for d in r_data2:
    contents = {}
    contents["parent"] = d["parent"]
    contents["children"] = None
    
    for i in fields_to_retain:
        contents[i] = d[i]
    tree[d["id"]] = contents


parents = set([d["parent"] for d  in r_data2])

In [15]:
#Roll up any item that isn't a parent and iterate
old_tree = tree.copy()

for i in range(1,20):
    new_tree = old_tree.copy()

    for key, value in old_tree.iteritems():
        #If the item isn't a parent (i.e. is a leaf) roll into node above

        #the key is the id

        if key not in parents and value["parent"]:  #If it's a leaf and it has a parent 
            
            if not new_tree[value["parent"]]["children"]:
                new_tree[value["parent"]]["children"] = []
                
            
            new_tree[value["parent"]]["children"].append(value)
            new_tree[value["parent"]]["children"].sort(key=lambda x: x["full_name"])
            del new_tree[key]

    old_tree = new_tree.copy()

    parents = set()
    for key in old_tree:
        parents.add(old_tree[key]["parent"])

#print json.dumps(new_tree,indent=4, sort_keys=True)

In [16]:
#The previous step will produce multiple trees if there are some staff who haven't put in their line manager

#Extract the 'main' tree - i.e. the one with the CEO at the head:


def count_members(tree,id):
    total = {'id': id, 'count':1}
    
    def count_recurse(node):
       
        if node["children"]:
            for c in node["children"]:
                total["count"]+=1
                count_recurse(c)
    
    count_recurse(tree)
    
    return total
            

biggest_trees = []
for key in new_tree:
    biggest_trees.append(count_members(new_tree[key], key))
    
biggest_trees = sorted(biggest_trees, key=lambda x: x["count"], reverse=True)
     
biggest_id = biggest_trees[0]["id"]

main_tree = new_tree[biggest_id]

In [17]:
#We also might want a flat version of the main tree
main_tree_nodes = []

def add_children(node):
    if node["children"]:
        for child in node["children"]:
            main_tree_nodes.append({"parent": node["id"], "id": child["id"]})
            add_children(child)
            
main_tree_nodes.append({"parent": None, "id": main_tree["id"]})
add_children(main_tree)
    
main_tree_nodes
main_tree_ids = [i["id"] for i in main_tree_nodes]

In [18]:
#ALso need data that for the select/search box in the app
select_box = []
for u in u_data:
    
    if u["id"] not in main_tree_ids:
        continue
    d = {}
    d["id"] = u["id"]
    d["text"] = ""
    if u["full_name"]:
            d["text"] += u["full_name"].title()
            
            if u["job_title"]:
                d["text"] += " " +u["job_title"].title()


        
            
            select_box.append(d)

        
select_box.sort(key=lambda x: x["text"])

In [20]:
final_data = {"tree": new_tree[biggest_id], "select_box":select_box} #Insert id of CEO here

In [21]:
import json
with open('../website/data/orgchart_datadelete.json', 'w') as outfile:
    json.dump(final_data, outfile)