In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources

#Output: json file formatted for Sankey diagram

#Created:           30.05.2016
#Last modified:     31.05.2016 Read in orig file (no manual reformatting necessary)

In [1]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd

In [33]:
df_BU = pd.read_csv("Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU

In [58]:
df_TD = pd.read_csv("Sankey_TD_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_TD


Out[58]:
proc Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
Wetlands mean 13 8 2 47 6 11 21 13 2 2 4 6 31 2 69
Wetlands min 7 6 2 30 4 8 18 10 1 1 3 4 8 1 59
Wetlands max 21 9 2 61 7 14 23 15 2 2 7 8 41 2 75
OtherNat mean 2 2 1 7 1 6 5 2 2 2 4 2 6 3 51
OtherNat min 1 1 1 5 1 3 4 1 1 1 1 1 2 1 21
OtherNat max 2 4 1 8 2 7 11 3 2 1 1 1 9 2 47
Agriwast mean 3 17 5 21 6 12 7 5 4 16 30 25 25 20 96
Agriwast min 2 15 6 19 5 12 7 3 3 13 16 16 17 16 75
Agriwast max 3 23 7 24 8 14 8 7 5 19 38 28 32 24 7
Fossil mean 2 11 2 5 1 7 4 18 2 7 23 3 7 21 11
Fossil min 1 9 1 1 1 5 2 13 0 6 9 3 5 14 77
Fossil max 2 15 3 8 1 8 5 20 2 9 31 4 8 27 33
BioBurBiof mean 1 0 1 6 0 4 6 1 1 1 3 2 6 1 32
BioBurBiof min 1 0 0 3 0 4 4 1 0 0 2 2 3 1 29
BioBurBiof max 1 1 1 7 1 4 9 2 1 1 4 2 6 1 44

In [59]:
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD


Out[59]:
stats Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
proc
Wetlands mean 13 8 2 47 6 11 21 13 2 2 4 6 31 2 69
Wetlands min 7 6 2 30 4 8 18 10 1 1 3 4 8 1 59
Wetlands max 21 9 2 61 7 14 23 15 2 2 7 8 41 2 75
OtherNat mean 2 2 1 7 1 6 5 2 2 2 4 2 6 3 51
OtherNat min 1 1 1 5 1 3 4 1 1 1 1 1 2 1 21
OtherNat max 2 4 1 8 2 7 11 3 2 1 1 1 9 2 47
Agriwast mean 3 17 5 21 6 12 7 5 4 16 30 25 25 20 96
Agriwast min 2 15 6 19 5 12 7 3 3 13 16 16 17 16 75
Agriwast max 3 23 7 24 8 14 8 7 5 19 38 28 32 24 7
Fossil mean 2 11 2 5 1 7 4 18 2 7 23 3 7 21 11
Fossil min 1 9 1 1 1 5 2 13 0 6 9 3 5 14 77
Fossil max 2 15 3 8 1 8 5 20 2 9 31 4 8 27 33
BioBurBiof mean 1 0 1 6 0 4 6 1 1 1 3 2 6 1 32
BioBurBiof min 1 0 0 3 0 4 4 1 0 0 2 2 3 1 29
BioBurBiof max 1 1 1 7 1 4 9 2 1 1 4 2 6 1 44

In [60]:
# Create dataframe from flux file

#xl_TD = pd.ExcelFile("Sankey_TD_2003-2012_25MAy2016.xls")
#df_TD = xl_TD.parse("Sankey_TD_2003-2012_25MAy2016",  skiprows=[0])
#df_TD.head(10)

In [61]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv


Out[61]:
Bor_NAme contUSA Cent_NAme Trop_SAme ... India SE_Asia Temp_Eurasia_Japan GLO
stats max mean min max mean min max mean min max ... min max mean min max mean min max mean min
proc
Agriwast 3 3 2 23 17 15 7 5 6 24 ... 16 32 25 17 24 20 16 7 96 75
BioBurBiof 1 1 1 1 0 0 1 1 0 7 ... 2 6 6 3 1 1 1 44 32 29
Fossil 2 2 1 15 11 9 3 2 1 8 ... 3 8 7 5 27 21 14 33 11 77
OtherNat 2 2 1 4 2 1 1 1 1 8 ... 1 9 6 2 2 3 1 47 51 21
Wetlands 21 13 7 9 8 6 2 2 2 61 ... 4 41 31 8 2 2 1 75 69 59

5 rows × 45 columns


In [62]:
df_TD_piv.columns[0][0]


Out[62]:
'Bor_NAme'

In [63]:
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme


Out[63]:
'Trop_SAme'

In [51]:
df_TD_piv['Bor_NAme']['mean']


Out[51]:
proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat       2
Wetlands      13
Name: mean, dtype: int64

In [52]:
df_TD_piv['Bor_NAme'].loc['Agriwast']['mean']


Out[52]:
3

In [53]:
df_TD_piv['Bor_NAme'].loc['BioBurBiof']['mean']


Out[53]:
1

In [54]:
#Store region names in list
numRegions = df_TD_piv.shape[1] / 3

idx = 0
sources = []
for num in range(0,numRegions):
    sources.append(df_TD_piv.columns[idx][0])
    idx = idx + 3
   
sources


Out[54]:
['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']

In [55]:
#Get target list
targets = df_TD_piv.index.tolist()
targets


Out[55]:
['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']

In [56]:
nodes = sources + targets
nodes


Out[56]:
['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO',
 'Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands']

In [57]:
file = open('Sankey_TD_2003-2012_25MAy2016_means.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        print df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['mean']
        file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()


Bor_NAme
Agriwast
3
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
13
contUSA
Agriwast
17
BioBurBiof
0
Fossil
11
OtherNat
2
Wetlands
8
Cent_NAme
Agriwast
5
BioBurBiof
1
Fossil
2
OtherNat
1
Wetlands
2
Trop_SAme
Agriwast
21
BioBurBiof
6
Fossil
5
OtherNat
7
Wetlands
47
Temp_SAme
Agriwast
6
BioBurBiof
0
Fossil
1
OtherNat
1
Wetlands
6
NAfr
Agriwast
12
BioBurBiof
4
Fossil
7
OtherNat
6
Wetlands
11
SAfr
Agriwast
7
BioBurBiof
6
Fossil
4
OtherNat
5
Wetlands
21
Russia
Agriwast
5
BioBurBiof
1
Fossil
18
OtherNat
2
Wetlands
13
Oceania
Agriwast
4
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
2
Europe
Agriwast
16
BioBurBiof
1
Fossil
7
OtherNat
2
Wetlands
2
China
Agriwast
30
BioBurBiof
3
Fossil
23
OtherNat
4
Wetlands
4
India
Agriwast
25
BioBurBiof
2
Fossil
3
OtherNat
2
Wetlands
6
SE_Asia
Agriwast
25
BioBurBiof
6
Fossil
7
OtherNat
6
Wetlands
31
Temp_Eurasia_Japan
Agriwast
20
BioBurBiof
1
Fossil
21
OtherNat
3
Wetlands
2
GLO
Agriwast
96
BioBurBiof
32
Fossil
11
OtherNat
51
Wetlands
69

In [ ]: