In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions
#Output: json file formatted for Sankey diagram
#Created: 31.05.2016
In [37]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd
In [79]:
df_BU = pd.read_csv("Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU
Out[79]:
In [80]:
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU
Out[80]:
In [81]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv
Out[81]:
In [82]:
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme
Out[82]:
In [83]:
df_BU_piv['Bor_NAme']['mean']
Out[83]:
In [84]:
df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']
Out[84]:
In [85]:
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']
Out[85]:
In [86]:
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3
idx = 0
targets = []
for num in range(0,numRegions):
targets.append(df_BU_piv.columns[idx][0])
idx = idx + 3
targets
Out[86]:
In [87]:
#Get target list
sources = df_BU_piv.index.tolist()
sources
Out[87]:
In [88]:
nodes = sources + targets
nodes
Out[88]:
In [89]:
df_BU_piv
Out[89]:
In [90]:
#Replace -99 values with 0
df_BU_piv = df_BU_piv.replace(-99, 0)
df_BU_piv
Out[90]:
In [77]:
#Replace -99 values with 0
#df = df_BU_piv
#df.replace(-99, 0)
#######df.loc["OtherNat"] = 0
#df
In [91]:
file = open('Sankey_BU_2003-2012_25MAy2016.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
print source
for target in targets:
print target
print df_BU_piv[target].loc[source]['mean']
value = df_BU_piv[target].loc[source]['mean']
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [ ]: