In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions

#Output: json file formatted for Sankey diagram

#Created:     31.05.2016

In [37]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd

In [79]:
df_BU = pd.read_csv("Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU


Out[79]:
proc Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
Wetlands mean 32 13 2 42 4 8 19 14 3 4 5 6 29 3 185
Wetlands min 15 6 1 19 1 3 15 5 1 1 1 1 18 1 153
Wetlands max 61 23 4 59 7 16 22 26 6 7 9 13 35 6 227
OtherNat mean -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 199
OtherNat min -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 104
OtherNat max -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 297
Agriwast mean 3 17 5 21 6 14 6 5 5 17 30 21 22 20 195
Agriwast min 2 15 2 18 5 12 5 5 5 15 23 16 18 17 178
Agriwast max 3 23 6 23 6 18 7 5 5 18 36 24 25 22 206
Fossil mean 2 11 2 8 1 9 4 20 2 6 24 3 6 22 121
Fossil min 1 9 0 3 1 7 3 18 1 3 15 2 5 20 114
Fossil max 3 16 3 19 1 14 5 26 2 8 31 4 7 26 133
BioBurBiof mean 1 1 1 3 0 4 6 2 1 1 3 2 5 1 30
BioBurBiof min 0 0 0 1 0 3 4 0 0 0 2 2 4 1 27
BioBurBiof max 1 2 1 6 1 5 10 4 1 1 4 3 5 2 35

In [80]:
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU


Out[80]:
stats Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
proc
Wetlands mean 32 13 2 42 4 8 19 14 3 4 5 6 29 3 185
Wetlands min 15 6 1 19 1 3 15 5 1 1 1 1 18 1 153
Wetlands max 61 23 4 59 7 16 22 26 6 7 9 13 35 6 227
OtherNat mean -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 199
OtherNat min -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 104
OtherNat max -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 297
Agriwast mean 3 17 5 21 6 14 6 5 5 17 30 21 22 20 195
Agriwast min 2 15 2 18 5 12 5 5 5 15 23 16 18 17 178
Agriwast max 3 23 6 23 6 18 7 5 5 18 36 24 25 22 206
Fossil mean 2 11 2 8 1 9 4 20 2 6 24 3 6 22 121
Fossil min 1 9 0 3 1 7 3 18 1 3 15 2 5 20 114
Fossil max 3 16 3 19 1 14 5 26 2 8 31 4 7 26 133
BioBurBiof mean 1 1 1 3 0 4 6 2 1 1 3 2 5 1 30
BioBurBiof min 0 0 0 1 0 3 4 0 0 0 2 2 4 1 27
BioBurBiof max 1 2 1 6 1 5 10 4 1 1 4 3 5 2 35

In [81]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv


Out[81]:
Bor_NAme contUSA Cent_NAme Trop_SAme ... India SE_Asia Temp_Eurasia_Japan GLO
stats max mean min max mean min max mean min max ... min max mean min max mean min max mean min
proc
Agriwast 3 3 2 23 17 15 6 5 2 23 ... 16 25 22 18 22 20 17 206 195 178
BioBurBiof 1 1 0 2 1 0 1 1 0 6 ... 2 5 5 4 2 1 1 35 30 27
Fossil 3 2 1 16 11 9 3 2 0 19 ... 2 7 6 5 26 22 20 133 121 114
OtherNat -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 ... -99 -99 -99 -99 -99 -99 -99 297 199 104
Wetlands 61 32 15 23 13 6 4 2 1 59 ... 1 35 29 18 6 3 1 227 185 153

5 rows × 45 columns


In [82]:
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme


Out[82]:
'Trop_SAme'

In [83]:
df_BU_piv['Bor_NAme']['mean']


Out[83]:
proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat     -99
Wetlands      32
Name: mean, dtype: int64

In [84]:
df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']


Out[84]:
3

In [85]:
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']


Out[85]:
1

In [86]:
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3

idx = 0
targets = []
for num in range(0,numRegions):
    targets.append(df_BU_piv.columns[idx][0])
    idx = idx + 3
   
targets


Out[86]:
['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']

In [87]:
#Get target list
sources = df_BU_piv.index.tolist()
sources


Out[87]:
['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']

In [88]:
nodes = sources + targets
nodes


Out[88]:
['Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands',
 'Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']

In [89]:
df_BU_piv


Out[89]:
Bor_NAme contUSA Cent_NAme Trop_SAme ... India SE_Asia Temp_Eurasia_Japan GLO
stats max mean min max mean min max mean min max ... min max mean min max mean min max mean min
proc
Agriwast 3 3 2 23 17 15 6 5 2 23 ... 16 25 22 18 22 20 17 206 195 178
BioBurBiof 1 1 0 2 1 0 1 1 0 6 ... 2 5 5 4 2 1 1 35 30 27
Fossil 3 2 1 16 11 9 3 2 0 19 ... 2 7 6 5 26 22 20 133 121 114
OtherNat -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 ... -99 -99 -99 -99 -99 -99 -99 297 199 104
Wetlands 61 32 15 23 13 6 4 2 1 59 ... 1 35 29 18 6 3 1 227 185 153

5 rows × 45 columns


In [90]:
#Replace -99 values with 0
df_BU_piv = df_BU_piv.replace(-99, 0)
df_BU_piv


Out[90]:
Bor_NAme contUSA Cent_NAme Trop_SAme ... India SE_Asia Temp_Eurasia_Japan GLO
stats max mean min max mean min max mean min max ... min max mean min max mean min max mean min
proc
Agriwast 3 3 2 23 17 15 6 5 2 23 ... 16 25 22 18 22 20 17 206 195 178
BioBurBiof 1 1 0 2 1 0 1 1 0 6 ... 2 5 5 4 2 1 1 35 30 27
Fossil 3 2 1 16 11 9 3 2 0 19 ... 2 7 6 5 26 22 20 133 121 114
OtherNat 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 297 199 104
Wetlands 61 32 15 23 13 6 4 2 1 59 ... 1 35 29 18 6 3 1 227 185 153

5 rows × 45 columns


In [77]:
#Replace -99 values with 0
#df = df_BU_piv
#df.replace(-99, 0)
#######df.loc["OtherNat"] = 0
#df

In [91]:
file = open('Sankey_BU_2003-2012_25MAy2016.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        print df_BU_piv[target].loc[source]['mean']
        value = df_BU_piv[target].loc[source]['mean']
        file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()


Agriwast
Bor_NAme
3
contUSA
17
Cent_NAme
5
Trop_SAme
21
Temp_SAme
6
NAfr
14
SAfr
6
Russia
5
Oceania
5
Europe
17
China
30
India
21
SE_Asia
22
Temp_Eurasia_Japan
20
GLO
195
BioBurBiof
Bor_NAme
1
contUSA
1
Cent_NAme
1
Trop_SAme
3
Temp_SAme
0
NAfr
4
SAfr
6
Russia
2
Oceania
1
Europe
1
China
3
India
2
SE_Asia
5
Temp_Eurasia_Japan
1
GLO
30
Fossil
Bor_NAme
2
contUSA
11
Cent_NAme
2
Trop_SAme
8
Temp_SAme
1
NAfr
9
SAfr
4
Russia
20
Oceania
2
Europe
6
China
24
India
3
SE_Asia
6
Temp_Eurasia_Japan
22
GLO
121
OtherNat
Bor_NAme
0
contUSA
0
Cent_NAme
0
Trop_SAme
0
Temp_SAme
0
NAfr
0
SAfr
0
Russia
0
Oceania
0
Europe
0
China
0
India
0
SE_Asia
0
Temp_Eurasia_Japan
0
GLO
199
Wetlands
Bor_NAme
32
contUSA
13
Cent_NAme
2
Trop_SAme
42
Temp_SAme
4
NAfr
8
SAfr
19
Russia
14
Oceania
3
Europe
4
China
5
India
6
SE_Asia
29
Temp_Eurasia_Japan
3
GLO
185

In [ ]: