In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources
#Output: json file formatted for Sankey diagram
#Created: 30.05.2016
#Last modified: 31.05.2016 Read in orig file (no manual reformatting necessary)
# 02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
# 06.06.2016 Output json files (Sankey format) with min and max values, respectively
# 07.06.2016 Min/max values output only when mean != -99 or != 0
# 10.06.2016 New data files from Marielle
In [1]:
import numpy as np
import pandas as pd
import collections
import os
In [2]:
df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_10june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/Sankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/myeditsSankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/mySankey_TD_2003-2012_june2016.txt", header=1, sep="\;")
df_TD
Out[2]:
In [3]:
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD
Out[3]:
In [4]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv
Out[4]:
In [5]:
df_TD_piv.columns[0][0]
Out[5]:
In [6]:
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme
Out[6]:
In [7]:
df_TD_piv['Bor_NAme']['mean']
Out[7]:
In [8]:
df_TD_piv['Bor_NAme'].loc['Agriwast']['mean']
Out[8]:
In [9]:
df_TD_piv['Bor_NAme'].loc['BioBurBiof']['mean']
Out[9]:
In [10]:
#Store region names in list
numRegions = df_TD_piv.shape[1] / 3
idx = 0
sources = []
for num in range(0,numRegions):
sources.append(df_TD_piv.columns[idx][0])
idx = idx + 3
sources
Out[10]:
In [11]:
#EXCLUDE GLO COLUMN
sources = sources[:len(sources) -1]
In [12]:
sources
Out[12]:
In [13]:
#Get target list
targets = df_TD_piv.index.tolist()
targets
Out[13]:
In [14]:
nodes = sources + targets
nodes
Out[14]:
In [15]:
file = open('../data/Sankey_TD_2003-2012_10june2016_mean_noZeros.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
print source
for target in targets:
print target
#print df_TD_piv[source].loc[target]['mean']
value = df_TD_piv[source].loc[target]['mean']
if value != -99:
if value != 0:
print value
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [16]:
#Write json file for Sankey, MIN VALUES ONLY
file = open('../data/Sankey_TD_2003-2012_10june2016_min.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
#print source
for target in targets:
#print target
#print df_TD_piv[source].loc[target]['min']
meanValue = df_TD_piv[source].loc[target]['mean']
value = df_TD_piv[source].loc[target]['min']
if meanValue != -99:
if meanValue != 0:
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [17]:
#Write json file for Sankey, MAX VALUES ONLY
file = open('../data/Sankey_TD_2003-2012_10june2016_max.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
#print source
for target in targets:
#print target
#print df_TD_piv[source].loc[target]['max']
meanValue = df_TD_piv[source].loc[target]['mean']
value = df_TD_piv[source].loc[target]['max']
if meanValue != -99:
if meanValue != 0:
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [ ]: