notebook.community

Edit and run



In [36]:

    
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources

#Output: json file formatted for Sankey diagram

#Created:       30.05.2016
#Last modified: 31.05.2016 Read in orig file (no manual reformatting necessary)
#               02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               10.06.2016 New data files from Marielle



In [1]:

    
import numpy as np
import pandas as pd
import collections
import os



In [2]:

    
df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_10june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/Sankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/myeditsSankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/mySankey_TD_2003-2012_june2016.txt", header=1, sep="\;")
df_TD









    Out[2]:






  
    
      
      proc
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
  
  
    
      Wetlands
      mean
      13
      9
      2
      44
      8
      12
      20
      13
      3
      2
      5
      6
      27
      3
      167
    
    
      Wetlands
      min
      7
      6
      1
      30
      4
      8
      11
      10
      1
      1
      3
      0
      8
      1
      127
    
    
      Wetlands
      max
      21
      13
      3
      61
      10
      14
      29
      16
      2
      4
      7
      10
      41
      5
      202
    
    
      OtherNat
      mean
      2
      3
      1
      9
      2
      6
      6
      2
      3
      3
      6
      4
      9
      4
      64
    
    
      OtherNat
      min
      0
      1
      0
      5
      1
      3
      4
      1
      1
      0
      1
      1
      2
      1
      21
    
    
      OtherNat
      max
      4
      9
      2
      22
      2
      11
      11
      4
      3
      2
      16
      12
      26
      13
      132
    
    
      Agriwast
      mean
      3
      18
      5
      21
      6
      12
      7
      5
      4
      15
      27
      25
      24
      19
      188
    
    
      Agriwast
      min
      2
      16
      2
      19
      5
      11
      7
      3
      3
      9
      16
      14
      12
      9
      115
    
    
      Agriwast
      max
      4
      23
      8
      27
      8
      15
      9
      7
      5
      19
      37
      43
      32
      24
      243
    
    
      Fossil
      mean
      2
      12
      2
      4
      1
      7
      4
      17
      1
      7
      18
      3
      8
      20
      105
    
    
      Fossil
      min
      1
      7
      0
      1
      0
      3
      2
      11
      0
      5
      9
      1
      5
      14
      77
    
    
      Fossil
      max
      4
      18
      3
      8
      1
      8
      5
      24
      2
      9
      30
      4
      14
      27
      133
    
    
      BioBurBiof
      mean
      1
      0
      1
      7
      0
      5
      6
      1
      1
      1
      3
      2
      6
      1
      34
    
    
      BioBurBiof
      min
      0
      0
      0
      3
      0
      2
      4
      1
      0
      0
      0
      0
      3
      0
      15
    
    
      BioBurBiof
      max
      1
      1
      1
      15
      1
      6
      9
      2
      1
      1
      4
      3
      8
      1
      53



In [3]:

    
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD









    Out[3]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      13
      9
      2
      44
      8
      12
      20
      13
      3
      2
      5
      6
      27
      3
      167
    
    
      Wetlands
      min
      7
      6
      1
      30
      4
      8
      11
      10
      1
      1
      3
      0
      8
      1
      127
    
    
      Wetlands
      max
      21
      13
      3
      61
      10
      14
      29
      16
      2
      4
      7
      10
      41
      5
      202
    
    
      OtherNat
      mean
      2
      3
      1
      9
      2
      6
      6
      2
      3
      3
      6
      4
      9
      4
      64
    
    
      OtherNat
      min
      0
      1
      0
      5
      1
      3
      4
      1
      1
      0
      1
      1
      2
      1
      21
    
    
      OtherNat
      max
      4
      9
      2
      22
      2
      11
      11
      4
      3
      2
      16
      12
      26
      13
      132
    
    
      Agriwast
      mean
      3
      18
      5
      21
      6
      12
      7
      5
      4
      15
      27
      25
      24
      19
      188
    
    
      Agriwast
      min
      2
      16
      2
      19
      5
      11
      7
      3
      3
      9
      16
      14
      12
      9
      115
    
    
      Agriwast
      max
      4
      23
      8
      27
      8
      15
      9
      7
      5
      19
      37
      43
      32
      24
      243
    
    
      Fossil
      mean
      2
      12
      2
      4
      1
      7
      4
      17
      1
      7
      18
      3
      8
      20
      105
    
    
      Fossil
      min
      1
      7
      0
      1
      0
      3
      2
      11
      0
      5
      9
      1
      5
      14
      77
    
    
      Fossil
      max
      4
      18
      3
      8
      1
      8
      5
      24
      2
      9
      30
      4
      14
      27
      133
    
    
      BioBurBiof
      mean
      1
      0
      1
      7
      0
      5
      6
      1
      1
      1
      3
      2
      6
      1
      34
    
    
      BioBurBiof
      min
      0
      0
      0
      3
      0
      2
      4
      1
      0
      0
      0
      0
      3
      0
      15
    
    
      BioBurBiof
      max
      1
      1
      1
      15
      1
      6
      9
      2
      1
      1
      4
      3
      8
      1
      53



In [4]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv









    Out[4]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Agriwast
      4
      3
      2
      23
      18
      16
      8
      5
      2
      27
      ...
      14
      32
      24
      12
      24
      19
      9
      243
      188
      115
    
    
      BioBurBiof
      1
      1
      0
      1
      0
      0
      1
      1
      0
      15
      ...
      0
      8
      6
      3
      1
      1
      0
      53
      34
      15
    
    
      Fossil
      4
      2
      1
      18
      12
      7
      3
      2
      0
      8
      ...
      1
      14
      8
      5
      27
      20
      14
      133
      105
      77
    
    
      OtherNat
      4
      2
      0
      9
      3
      1
      2
      1
      0
      22
      ...
      1
      26
      9
      2
      13
      4
      1
      132
      64
      21
    
    
      Wetlands
      21
      13
      7
      13
      9
      6
      3
      2
      1
      61
      ...
      0
      41
      27
      8
      5
      3
      1
      202
      167
      127
    
  

5 rows × 45 columns



In [5]:

    
df_TD_piv.columns[0][0]









    Out[5]:





'Bor_NAme'



In [6]:

    
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme









    Out[6]:





'Trop_SAme'



In [7]:

    
df_TD_piv['Bor_NAme']['mean']









    Out[7]:





proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat       2
Wetlands      13
Name: mean, dtype: int64



In [8]:

    
df_TD_piv['Bor_NAme'].loc['Agriwast']['mean']









    Out[8]:





3



In [9]:

    
df_TD_piv['Bor_NAme'].loc['BioBurBiof']['mean']









    Out[9]:





1



In [10]:

    
#Store region names in list
numRegions = df_TD_piv.shape[1] / 3

idx = 0
sources = []
for num in range(0,numRegions):
    sources.append(df_TD_piv.columns[idx][0])
    idx = idx + 3
   
sources









    Out[10]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']



In [11]:

    
#EXCLUDE GLO COLUMN
sources = sources[:len(sources) -1]



In [12]:

    
sources









    Out[12]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan']



In [13]:

    
#Get target list
targets = df_TD_piv.index.tolist()
targets









    Out[13]:





['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']



In [14]:

    
nodes = sources + targets
nodes









    Out[14]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands']



In [15]:

    
file = open('../data/Sankey_TD_2003-2012_10june2016_mean_noZeros.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        #print df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['mean']
        if value != -99:
            if value != 0:
                print value
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()









    



Bor_NAme
Agriwast
3
BioBurBiof
1
Fossil
2
OtherNat
2
Wetlands
13
contUSA
Agriwast
18
BioBurBiof
Fossil
12
OtherNat
3
Wetlands
9
Cent_NAme
Agriwast
5
BioBurBiof
1
Fossil
2
OtherNat
1
Wetlands
2
Trop_SAme
Agriwast
21
BioBurBiof
7
Fossil
4
OtherNat
9
Wetlands
44
Temp_SAme
Agriwast
6
BioBurBiof
Fossil
1
OtherNat
2
Wetlands
8
NAfr
Agriwast
12
BioBurBiof
5
Fossil
7
OtherNat
6
Wetlands
12
SAfr
Agriwast
7
BioBurBiof
6
Fossil
4
OtherNat
6
Wetlands
20
Russia
Agriwast
5
BioBurBiof
1
Fossil
17
OtherNat
2
Wetlands
13
Oceania
Agriwast
4
BioBurBiof
1
Fossil
1
OtherNat
3
Wetlands
3
Europe
Agriwast
15
BioBurBiof
1
Fossil
7
OtherNat
3
Wetlands
2
China
Agriwast
27
BioBurBiof
3
Fossil
18
OtherNat
6
Wetlands
5
India
Agriwast
25
BioBurBiof
2
Fossil
3
OtherNat
4
Wetlands
6
SE_Asia
Agriwast
24
BioBurBiof
6
Fossil
8
OtherNat
9
Wetlands
27
Temp_Eurasia_Japan
Agriwast
19
BioBurBiof
1
Fossil
20
OtherNat
4
Wetlands
3



In [16]:

    
#Write json file for Sankey, MIN VALUES ONLY

file = open('../data/Sankey_TD_2003-2012_10june2016_min.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    #print source
    for target in targets:
        #print target
        #print df_TD_piv[source].loc[target]['min']
        meanValue = df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['min']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [17]:

    
#Write json file for Sankey, MAX VALUES ONLY

file = open('../data/Sankey_TD_2003-2012_10june2016_max.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    #print source
    for target in targets:
        #print target
        #print df_TD_piv[source].loc[target]['max']
        meanValue = df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['max']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [ ]:

	proc	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
Wetlands	mean	13	9	2	44	8	12	20	13	3	2	5	6	27	3	167
Wetlands	min	7	6	1	30	4	8	11	10	1	1	3	0	8	1	127
Wetlands	max	21	13	3	61	10	14	29	16	2	4	7	10	41	5	202
OtherNat	mean	2	3	1	9	2	6	6	2	3	3	6	4	9	4	64
OtherNat	min	0	1	0	5	1	3	4	1	1	0	1	1	2	1	21
OtherNat	max	4	9	2	22	2	11	11	4	3	2	16	12	26	13	132
Agriwast	mean	3	18	5	21	6	12	7	5	4	15	27	25	24	19	188
Agriwast	min	2	16	2	19	5	11	7	3	3	9	16	14	12	9	115
Agriwast	max	4	23	8	27	8	15	9	7	5	19	37	43	32	24	243
Fossil	mean	2	12	2	4	1	7	4	17	1	7	18	3	8	20	105
Fossil	min	1	7	0	1	0	3	2	11	0	5	9	1	5	14	77
Fossil	max	4	18	3	8	1	8	5	24	2	9	30	4	14	27	133
BioBurBiof	mean	1	0	1	7	0	5	6	1	1	1	3	2	6	1	34
BioBurBiof	min	0	0	0	3	0	2	4	1	0	0	0	0	3	0	15
BioBurBiof	max	1	1	1	15	1	6	9	2	1	1	4	3	8	1	53

	stats	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
proc
Wetlands	mean	13	9	2	44	8	12	20	13	3	2	5	6	27	3	167
Wetlands	min	7	6	1	30	4	8	11	10	1	1	3	0	8	1	127
Wetlands	max	21	13	3	61	10	14	29	16	2	4	7	10	41	5	202
OtherNat	mean	2	3	1	9	2	6	6	2	3	3	6	4	9	4	64
OtherNat	min	0	1	0	5	1	3	4	1	1	0	1	1	2	1	21
OtherNat	max	4	9	2	22	2	11	11	4	3	2	16	12	26	13	132
Agriwast	mean	3	18	5	21	6	12	7	5	4	15	27	25	24	19	188
Agriwast	min	2	16	2	19	5	11	7	3	3	9	16	14	12	9	115
Agriwast	max	4	23	8	27	8	15	9	7	5	19	37	43	32	24	243
Fossil	mean	2	12	2	4	1	7	4	17	1	7	18	3	8	20	105
Fossil	min	1	7	0	1	0	3	2	11	0	5	9	1	5	14	77
Fossil	max	4	18	3	8	1	8	5	24	2	9	30	4	14	27	133
BioBurBiof	mean	1	0	1	7	0	5	6	1	1	1	3	2	6	1	34
BioBurBiof	min	0	0	0	3	0	2	4	1	0	0	0	0	3	0	15
BioBurBiof	max	1	1	1	15	1	6	9	2	1	1	4	3	8	1	53

	Bor_NAme			contUSA			Cent_NAme			Trop_SAme	...	India	SE_Asia			Temp_Eurasia_Japan			GLO
stats	max	mean	min	max	mean	min	max	mean	min	max	...	min	max	mean	min	max	mean	min	max	mean	min
proc
Agriwast	4	3	2	23	18	16	8	5	2	27	...	14	32	24	12	24	19	9	243	188	115
BioBurBiof	1	1	0	1	0	0	1	1	0	15	...	0	8	6	3	1	1	0	53	34	15
Fossil	4	2	1	18	12	7	3	2	0	8	...	1	14	8	5	27	20	14	133	105	77
OtherNat	4	2	0	9	3	1	2	1	0	22	...	1	26	9	2	13	4	1	132	64	21
Wetlands	21	13	7	13	9	6	3	2	1	61	...	0	41	27	8	5	3	1	202	167	127