In [36]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions

#Output: json file formatted for Sankey diagram

#Created:     31.05.2016

In [2]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd

In [3]:
df_BU = pd.read_csv("Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU


Out[3]:
proc Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
Wetlands mean 32 13 2 42 4 8 19 14 3 4 5 6 29 3 185
Wetlands min 15 6 1 19 1 3 15 5 1 1 1 1 18 1 153
Wetlands max 61 23 4 59 7 16 22 26 6 7 9 13 35 6 227
OtherNat mean -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 199
OtherNat min -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 104
OtherNat max -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 297
Agriwast mean 3 17 5 21 6 14 6 5 5 17 30 21 22 20 195
Agriwast min 2 15 2 18 5 12 5 5 5 15 23 16 18 17 178
Agriwast max 3 23 6 23 6 18 7 5 5 18 36 24 25 22 206
Fossil mean 2 11 2 8 1 9 4 20 2 6 24 3 6 22 121
Fossil min 1 9 0 3 1 7 3 18 1 3 15 2 5 20 114
Fossil max 3 16 3 19 1 14 5 26 2 8 31 4 7 26 133
BioBurBiof mean 1 1 1 3 0 4 6 2 1 1 3 2 5 1 30
BioBurBiof min 0 0 0 1 0 3 4 0 0 0 2 2 4 1 27
BioBurBiof max 1 2 1 6 1 5 10 4 1 1 4 3 5 2 35

In [4]:
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU


Out[4]:
stats Bor_NAme contUSA Cent_NAme Trop_SAme Temp_SAme NAfr SAfr Russia Oceania Europe China India SE_Asia Temp_Eurasia_Japan GLO
proc
Wetlands mean 32 13 2 42 4 8 19 14 3 4 5 6 29 3 185
Wetlands min 15 6 1 19 1 3 15 5 1 1 1 1 18 1 153
Wetlands max 61 23 4 59 7 16 22 26 6 7 9 13 35 6 227
OtherNat mean -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 199
OtherNat min -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 104
OtherNat max -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 297
Agriwast mean 3 17 5 21 6 14 6 5 5 17 30 21 22 20 195
Agriwast min 2 15 2 18 5 12 5 5 5 15 23 16 18 17 178
Agriwast max 3 23 6 23 6 18 7 5 5 18 36 24 25 22 206
Fossil mean 2 11 2 8 1 9 4 20 2 6 24 3 6 22 121
Fossil min 1 9 0 3 1 7 3 18 1 3 15 2 5 20 114
Fossil max 3 16 3 19 1 14 5 26 2 8 31 4 7 26 133
BioBurBiof mean 1 1 1 3 0 4 6 2 1 1 3 2 5 1 30
BioBurBiof min 0 0 0 1 0 3 4 0 0 0 2 2 4 1 27
BioBurBiof max 1 2 1 6 1 5 10 4 1 1 4 3 5 2 35

In [5]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv


Out[5]:
Bor_NAme contUSA Cent_NAme Trop_SAme ... India SE_Asia Temp_Eurasia_Japan GLO
stats max mean min max mean min max mean min max ... min max mean min max mean min max mean min
proc
Agriwast 3 3 2 23 17 15 6 5 2 23 ... 16 25 22 18 22 20 17 206 195 178
BioBurBiof 1 1 0 2 1 0 1 1 0 6 ... 2 5 5 4 2 1 1 35 30 27
Fossil 3 2 1 16 11 9 3 2 0 19 ... 2 7 6 5 26 22 20 133 121 114
OtherNat -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 ... -99 -99 -99 -99 -99 -99 -99 297 199 104
Wetlands 61 32 15 23 13 6 4 2 1 59 ... 1 35 29 18 6 3 1 227 185 153

5 rows × 45 columns


In [7]:
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme


Out[7]:
'Trop_SAme'

In [8]:
df_BU_piv['Bor_NAme']['mean']


Out[8]:
proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat     -99
Wetlands      32
Name: mean, dtype: int64

In [9]:
df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']


Out[9]:
3

In [10]:
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']


Out[10]:
1

In [12]:
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3

idx = 0
targets = []
for num in range(0,numRegions):
    targets.append(df_BU_piv.columns[idx][0])
    idx = idx + 3
   
targets


Out[12]:
['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']

In [14]:
#Get target list
sources = df_BU_piv.index.tolist()
sources


Out[14]:
['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']

In [15]:
nodes = sources + targets
nodes


Out[15]:
['Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands',
 'Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']

In [57]:
file = open('Sankey_BU_2003-2012_25MAy2016.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        print df_BU_piv[source].loc[target]['mean']
        value = df_BU_piv[source].loc[target]['mean']
        file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()


Bor_NAme
Agriwast
3
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
13
contUSA
Agriwast
17
BioBurBiof
0
Fossil
11
OtherNat
2
Wetlands
8
Cent_NAme
Agriwast
5
BioBurBiof
1
Fossil
2
OtherNat
1
Wetlands
2
Trop_SAme
Agriwast
21
BioBurBiof
6
Fossil
5
OtherNat
7
Wetlands
47
Temp_SAme
Agriwast
6
BioBurBiof
0
Fossil
1
OtherNat
1
Wetlands
6
NAfr
Agriwast
12
BioBurBiof
4
Fossil
7
OtherNat
6
Wetlands
11
SAfr
Agriwast
7
BioBurBiof
6
Fossil
4
OtherNat
5
Wetlands
21
Russia
Agriwast
5
BioBurBiof
1
Fossil
18
OtherNat
2
Wetlands
13
Oceania
Agriwast
4
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
2
Europe
Agriwast
16
BioBurBiof
1
Fossil
7
OtherNat
2
Wetlands
2
China
Agriwast
30
BioBurBiof
3
Fossil
23
OtherNat
4
Wetlands
4
India
Agriwast
25
BioBurBiof
2
Fossil
3
OtherNat
2
Wetlands
6
SE_Asia
Agriwast
25
BioBurBiof
6
Fossil
7
OtherNat
6
Wetlands
31
Temp_Eurasia_Japan
Agriwast
20
BioBurBiof
1
Fossil
21
OtherNat
3
Wetlands
2
GLO
Agriwast
96
BioBurBiof
32
Fossil
11
OtherNat
51
Wetlands
69

In [ ]: