In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources
#Output: json file formatted for Sankey diagram
#Created: 30.05.2016
#Last modified: 31.05.2016 Read in orig file (no manual reformatting necessary)
In [1]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd
In [33]:
df_BU = pd.read_csv("Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU
In [58]:
df_TD = pd.read_csv("Sankey_TD_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_TD
Out[58]:
In [59]:
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD
Out[59]:
In [60]:
# Create dataframe from flux file
#xl_TD = pd.ExcelFile("Sankey_TD_2003-2012_25MAy2016.xls")
#df_TD = xl_TD.parse("Sankey_TD_2003-2012_25MAy2016", skiprows=[0])
#df_TD.head(10)
In [61]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv
Out[61]:
In [62]:
df_TD_piv.columns[0][0]
Out[62]:
In [63]:
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme
Out[63]:
In [51]:
df_TD_piv['Bor_NAme']['mean']
Out[51]:
In [52]:
df_TD_piv['Bor_NAme'].loc['Agriwast']['mean']
Out[52]:
In [53]:
df_TD_piv['Bor_NAme'].loc['BioBurBiof']['mean']
Out[53]:
In [54]:
#Store region names in list
numRegions = df_TD_piv.shape[1] / 3
idx = 0
sources = []
for num in range(0,numRegions):
sources.append(df_TD_piv.columns[idx][0])
idx = idx + 3
sources
Out[54]:
In [55]:
#Get target list
targets = df_TD_piv.index.tolist()
targets
Out[55]:
In [56]:
nodes = sources + targets
nodes
Out[56]:
In [57]:
file = open('Sankey_TD_2003-2012_25MAy2016_means.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
print source
for target in targets:
print target
print df_TD_piv[source].loc[target]['mean']
value = df_TD_piv[source].loc[target]['mean']
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [ ]: