In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions
#Output: json file formatted for Sankey diagram
#Created: 31.05.2016
#Last modified: 02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
# 03.06.2016 Add "Not affected" category; Exclude 0 and -99 values
# 06.06.2016 Output json files (Sankey format) with min and max values, respectively
# 07.06.2016 Min/max values output only when mean != -99 or != 0
In [1]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd
In [21]:
df_BU = pd.read_csv("../data/Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU
Out[21]:
In [22]:
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU
Out[22]:
In [23]:
#Add "Not affected" column and set values to 0
df_BU['Not affected'] = 0
df_BU
Out[23]:
In [24]:
#Copy GLO values for OtherNat into "Not affected" col
df_BU['Not affected']['OtherNat'] = df_BU['GLO']['OtherNat']
In [25]:
df_BU
Out[25]:
In [26]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv
Out[26]:
In [27]:
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme
Out[27]:
In [28]:
df_BU_piv['Bor_NAme']['mean']
Out[28]:
In [29]:
df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']
Out[29]:
In [30]:
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']
Out[30]:
In [31]:
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3
idx = 0
targets = []
for num in range(0,numRegions):
if df_BU_piv.columns[idx][0] != "GLO" : #exclude GLO column
targets.append(df_BU_piv.columns[idx][0])
idx = idx + 3
targets
Out[31]:
In [32]:
#Get target list
sources = df_BU_piv.index.tolist()
sources
Out[32]:
In [33]:
nodes = sources + targets
nodes
Out[33]:
In [34]:
df_BU_piv
Out[34]:
In [35]:
#Replace -99 values with 0
#df_BU_piv = df_BU_piv.replace(-99, 1)
#df_BU_piv
#Replace -99 and 0 values with -1
#df_BU_piv = df_BU_piv.replace(-99, -1)
#df_BU_piv = df_BU_piv.replace(0, -1)
df_BU_piv
Out[35]:
In [47]:
#Write json file for Sankey, MEAN VALUES ONLY
#GLO omitted
file = open('../data/Sankey_BU_2003-2012_25MAy2016_mean_noZeros.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
print source
for target in targets:
print target
#print df_BU_piv[target].loc[source]['mean']
value = df_BU_piv[target].loc[source]['mean']
#print "value:"
#print value
if value != -99:
if value != 0:
print value
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [50]:
#Write json file for Sankey, MIN VALUES ONLY
#GLO omitted
file = open('../data/Sankey_BU_2003-2012_25MAy2016_min.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
#print source
for target in targets:
#print target
#print df_BU_piv[target].loc[source]['min']
meanValue = df_BU_piv[target].loc[source]['mean']
value = df_BU_piv[target].loc[source]['min']
if meanValue != -99:
if meanValue != 0:
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [51]:
#Write json file for Sankey, MAX VALUES ONLY
#GLO omitted
file = open('../data/Sankey_BU_2003-2012_25MAy2016_max.json', 'w')
file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')
file.write('"links": [\n')
for source in sources:
#print source
for target in targets:
#print target
#print df_BU_piv[target].loc[source]['max']
meanValue = df_BU_piv[target].loc[source]['mean']
value = df_BU_piv[target].loc[source]['max']
if meanValue != -99:
if meanValue != 0:
file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')
file.close()
In [20]:
#Write tsv file with min/max values
#file_name = "../data/statsBU.tsv"
#df_BU_piv.to_csv(file_name, sep='\t', encoding='utf-8')
#df_BU_piv
In [39]:
#junk = pd.read_csv("../data/Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
#junk.rename(columns = {'proc':'stats'}, inplace = True)
In [40]:
#Write tsv file with min/max values
#file_name = "../data/junk.tsv"
#junk.to_csv(file_name, sep='\t', encoding='utf-8')
In [ ]: