notebook.community

Edit and run



In [1]:

    
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources

#Output: json file formatted for Sankey diagram

#Created:       30.05.2016
#Last modified: 31.05.2016 Read in orig file (no manual reformatting necessary)
#               02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               10.06.2016 New data files from Marielle



In [2]:

    
import numpy as np
import pandas as pd
import collections
import os



In [3]:

    
df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_sept2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_10june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/Sankey_TD_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/Sankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/myeditsSankey_TD_2003-2012_june2016.txt", header=1, delim_whitespace=True)
#df_TD = pd.read_csv("../data/current/mySankey_TD_2003-2012_june2016.txt", header=1, sep="\;")
df_TD









    Out[3]:






  
    
      
      proc
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
  
  
    
      Wetlands
       mean
       13
        9
        2
        44
        8
       12
       20
       13
        3
        2
        5
        6
       27
        3
       167
    
    
      Wetlands
        min
        7
        6
        1
        30
        4
        8
       11
       10
        1
        1
        3
        0
        8
        1
       127
    
    
      Wetlands
        max
       21
       13
        3
        61
       10
       14
       29
       16
        2
        4
        7
       10
       41
        5
       202
    
    
      OtherNatural
       mean
        2
        3
        1
         9
        2
        6
        6
        2
        3
        3
        6
        4
        9
        4
        64
    
    
      OtherNatural
        min
        0
        1
        0
         5
        1
        3
        4
        1
        1
        0
        1
        1
        2
        1
        21
    
    
      OtherNatural
        max
        4
        9
        2
        22
        2
       11
       11
        4
        3
        2
       16
       12
       26
       13
       132
    
    
      AgriWaste
       mean
        3
       18
        5
        21
        6
       12
        7
        5
        4
       15
       27
       25
       24
       19
       188
    
    
      AgriWaste
        min
        2
       16
        2
        19
        5
       11
        7
        3
        3
        9
       16
       14
       12
        9
       115
    
    
      AgriWaste
        max
        4
       23
        8
        27
        8
       15
        9
        7
        5
       19
       37
       43
       32
       24
       243
    
    
      Fossil
       mean
        2
       12
        2
         4
        1
        7
        4
       17
        1
        7
       18
        3
        8
       20
       105
    
    
      Fossil
        min
        1
        7
        0
         1
        0
        3
        2
       11
        0
        5
        9
        1
        5
       14
        77
    
    
      Fossil
        max
        4
       18
        3
         8
        1
        8
        5
       24
        2
        9
       30
        4
       14
       27
       133
    
    
      BioBurBiof
       mean
        1
        0
        1
         7
        0
        5
        6
        1
        1
        1
        3
        2
        6
        1
        34
    
    
      BioBurBiof
        min
        0
        0
        0
         3
        0
        2
        4
        1
        0
        0
        0
        0
        3
        0
        15
    
    
      BioBurBiof
        max
        1
        1
        1
        15
        1
        6
        9
        2
        1
        1
        4
        3
        8
        1
        53
    
    
      SumSources
       mean
       20
       41
       11
        84
       17
       42
       44
       38
       11
       28
       58
       39
       74
       46
       558
    
    
      SumSources
        min
       13
       34
        5
        65
       12
       36
       38
       31
        7
       21
       51
       28
       55
       38
       540
    
    
      SumSources
        max
       27
       49
       15
       101
       27
       55
       53
       45
       19
       34
       72
       46
       84
       54
       568



In [4]:

    
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD









    Out[4]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
       mean
       13
        9
        2
        44
        8
       12
       20
       13
        3
        2
        5
        6
       27
        3
       167
    
    
      Wetlands
        min
        7
        6
        1
        30
        4
        8
       11
       10
        1
        1
        3
        0
        8
        1
       127
    
    
      Wetlands
        max
       21
       13
        3
        61
       10
       14
       29
       16
        2
        4
        7
       10
       41
        5
       202
    
    
      OtherNatural
       mean
        2
        3
        1
         9
        2
        6
        6
        2
        3
        3
        6
        4
        9
        4
        64
    
    
      OtherNatural
        min
        0
        1
        0
         5
        1
        3
        4
        1
        1
        0
        1
        1
        2
        1
        21
    
    
      OtherNatural
        max
        4
        9
        2
        22
        2
       11
       11
        4
        3
        2
       16
       12
       26
       13
       132
    
    
      AgriWaste
       mean
        3
       18
        5
        21
        6
       12
        7
        5
        4
       15
       27
       25
       24
       19
       188
    
    
      AgriWaste
        min
        2
       16
        2
        19
        5
       11
        7
        3
        3
        9
       16
       14
       12
        9
       115
    
    
      AgriWaste
        max
        4
       23
        8
        27
        8
       15
        9
        7
        5
       19
       37
       43
       32
       24
       243
    
    
      Fossil
       mean
        2
       12
        2
         4
        1
        7
        4
       17
        1
        7
       18
        3
        8
       20
       105
    
    
      Fossil
        min
        1
        7
        0
         1
        0
        3
        2
       11
        0
        5
        9
        1
        5
       14
        77
    
    
      Fossil
        max
        4
       18
        3
         8
        1
        8
        5
       24
        2
        9
       30
        4
       14
       27
       133
    
    
      BioBurBiof
       mean
        1
        0
        1
         7
        0
        5
        6
        1
        1
        1
        3
        2
        6
        1
        34
    
    
      BioBurBiof
        min
        0
        0
        0
         3
        0
        2
        4
        1
        0
        0
        0
        0
        3
        0
        15
    
    
      BioBurBiof
        max
        1
        1
        1
        15
        1
        6
        9
        2
        1
        1
        4
        3
        8
        1
        53
    
    
      SumSources
       mean
       20
       41
       11
        84
       17
       42
       44
       38
       11
       28
       58
       39
       74
       46
       558
    
    
      SumSources
        min
       13
       34
        5
        65
       12
       36
       38
       31
        7
       21
       51
       28
       55
       38
       540
    
    
      SumSources
        max
       27
       49
       15
       101
       27
       55
       53
       45
       19
       34
       72
       46
       84
       54
       568



In [5]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv









    Out[5]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      AgriWaste
        4
        3
        2
       23
       18
       16
        8
        5
       2
        27
      ...
       14
       32
       24
       12
       24
       19
        9
       243
       188
       115
    
    
      BioBurBiof
        1
        1
        0
        1
        0
        0
        1
        1
       0
        15
      ...
        0
        8
        6
        3
        1
        1
        0
        53
        34
        15
    
    
      Fossil
        4
        2
        1
       18
       12
        7
        3
        2
       0
         8
      ...
        1
       14
        8
        5
       27
       20
       14
       133
       105
        77
    
    
      OtherNatural
        4
        2
        0
        9
        3
        1
        2
        1
       0
        22
      ...
        1
       26
        9
        2
       13
        4
        1
       132
        64
        21
    
    
      SumSources
       27
       20
       13
       49
       41
       34
       15
       11
       5
       101
      ...
       28
       84
       74
       55
       54
       46
       38
       568
       558
       540
    
    
      Wetlands
       21
       13
        7
       13
        9
        6
        3
        2
       1
        61
      ...
        0
       41
       27
        8
        5
        3
        1
       202
       167
       127
    
  

6 rows × 45 columns



In [6]:

    
df_TD_piv.columns[0][0]









    Out[6]:





'Bor_NAme'



In [7]:

    
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme









    Out[7]:





'Trop_SAme'



In [8]:

    
df_TD_piv['Bor_NAme']['mean']









    Out[8]:





proc
AgriWaste        3
BioBurBiof       1
Fossil           2
OtherNatural     2
SumSources      20
Wetlands        13
Name: mean, dtype: int64



In [11]:

    
df_TD_piv['Bor_NAme'].loc['AgriWaste']['mean']









    Out[11]:





3



In [12]:

    
df_TD_piv['Bor_NAme'].loc['BioBurBiof']['mean']









    Out[12]:





1



In [13]:

    
#Store region names in list
numRegions = df_TD_piv.shape[1] / 3

idx = 0
sources = []
for num in range(0,numRegions):
    sources.append(df_TD_piv.columns[idx][0])
    idx = idx + 3
   
sources









    Out[13]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'GLO']



In [14]:

    
#EXCLUDE GLO COLUMN
sources = sources[:len(sources) -1]



In [15]:

    
sources









    Out[15]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan']



In [22]:

    
#Get target list
targets_orig = df_TD_piv.index.tolist()
targets_orig









    Out[22]:





['AgriWaste', 'BioBurBiof', 'Fossil', 'OtherNatural', 'SumSources', 'Wetlands']



In [34]:

    
#EXCLUDE SumSources COLUMN
targets = targets_orig[0:5]
targets[len(targets) -1] = targets_orig[5]

targets









    Out[34]:





['AgriWaste', 'BioBurBiof', 'Fossil', 'OtherNatural', 'Wetlands']



In [35]:

    
nodes = sources + targets
nodes









    Out[35]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'AgriWaste',
 'BioBurBiof',
 'Fossil',
 'OtherNatural',
 'Wetlands']



In [36]:

    
file = open('../data/Sankey_TD_2003-2012_16sept2016_mean_noZeros.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print source
    for target in targets:
        print target
        #print df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['mean']
        if value != -99:
            if value != 0:
                print value
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()









    



Bor_NAme
AgriWaste
3
BioBurBiof
1
Fossil
2
OtherNatural
2
Wetlands
13
contUSA
AgriWaste
18
BioBurBiof
Fossil
12
OtherNatural
3
Wetlands
9
Cent_NAme
AgriWaste
5
BioBurBiof
1
Fossil
2
OtherNatural
1
Wetlands
2
Trop_SAme
AgriWaste
21
BioBurBiof
7
Fossil
4
OtherNatural
9
Wetlands
44
Temp_SAme
AgriWaste
6
BioBurBiof
Fossil
1
OtherNatural
2
Wetlands
8
NAfr
AgriWaste
12
BioBurBiof
5
Fossil
7
OtherNatural
6
Wetlands
12
SAfr
AgriWaste
7
BioBurBiof
6
Fossil
4
OtherNatural
6
Wetlands
20
Russia
AgriWaste
5
BioBurBiof
1
Fossil
17
OtherNatural
2
Wetlands
13
Oceania
AgriWaste
4
BioBurBiof
1
Fossil
1
OtherNatural
3
Wetlands
3
Europe
AgriWaste
15
BioBurBiof
1
Fossil
7
OtherNatural
3
Wetlands
2
China
AgriWaste
27
BioBurBiof
3
Fossil
18
OtherNatural
6
Wetlands
5
India
AgriWaste
25
BioBurBiof
2
Fossil
3
OtherNatural
4
Wetlands
6
SE_Asia
AgriWaste
24
BioBurBiof
6
Fossil
8
OtherNatural
9
Wetlands
27
Temp_Eurasia_Japan
AgriWaste
19
BioBurBiof
1
Fossil
20
OtherNatural
4
Wetlands
3



In [37]:

    
#Write json file for Sankey, MIN VALUES ONLY

file = open('../data/Sankey_TD_2003-2012_16sept2016_min.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    #print source
    for target in targets:
        #print target
        #print df_TD_piv[source].loc[target]['min']
        meanValue = df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['min']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [38]:

    
#Write json file for Sankey, MAX VALUES ONLY

file = open('../data/Sankey_TD_2003-2012_16sept2016_max.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    #print source
    for target in targets:
        #print target
        #print df_TD_piv[source].loc[target]['max']
        meanValue = df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['max']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [ ]:

	proc	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
Wetlands	mean	13	9	2	44	8	12	20	13	3	2	5	6	27	3	167
Wetlands	min	7	6	1	30	4	8	11	10	1	1	3	0	8	1	127
Wetlands	max	21	13	3	61	10	14	29	16	2	4	7	10	41	5	202
OtherNatural	mean	2	3	1	9	2	6	6	2	3	3	6	4	9	4	64
OtherNatural	min	0	1	0	5	1	3	4	1	1	0	1	1	2	1	21
OtherNatural	max	4	9	2	22	2	11	11	4	3	2	16	12	26	13	132
AgriWaste	mean	3	18	5	21	6	12	7	5	4	15	27	25	24	19	188
AgriWaste	min	2	16	2	19	5	11	7	3	3	9	16	14	12	9	115
AgriWaste	max	4	23	8	27	8	15	9	7	5	19	37	43	32	24	243
Fossil	mean	2	12	2	4	1	7	4	17	1	7	18	3	8	20	105
Fossil	min	1	7	0	1	0	3	2	11	0	5	9	1	5	14	77
Fossil	max	4	18	3	8	1	8	5	24	2	9	30	4	14	27	133
BioBurBiof	mean	1	0	1	7	0	5	6	1	1	1	3	2	6	1	34
BioBurBiof	min	0	0	0	3	0	2	4	1	0	0	0	0	3	0	15
BioBurBiof	max	1	1	1	15	1	6	9	2	1	1	4	3	8	1	53
SumSources	mean	20	41	11	84	17	42	44	38	11	28	58	39	74	46	558
SumSources	min	13	34	5	65	12	36	38	31	7	21	51	28	55	38	540
SumSources	max	27	49	15	101	27	55	53	45	19	34	72	46	84	54	568

	stats	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
proc
Wetlands	mean	13	9	2	44	8	12	20	13	3	2	5	6	27	3	167
Wetlands	min	7	6	1	30	4	8	11	10	1	1	3	0	8	1	127
Wetlands	max	21	13	3	61	10	14	29	16	2	4	7	10	41	5	202
OtherNatural	mean	2	3	1	9	2	6	6	2	3	3	6	4	9	4	64
OtherNatural	min	0	1	0	5	1	3	4	1	1	0	1	1	2	1	21
OtherNatural	max	4	9	2	22	2	11	11	4	3	2	16	12	26	13	132
AgriWaste	mean	3	18	5	21	6	12	7	5	4	15	27	25	24	19	188
AgriWaste	min	2	16	2	19	5	11	7	3	3	9	16	14	12	9	115
AgriWaste	max	4	23	8	27	8	15	9	7	5	19	37	43	32	24	243
Fossil	mean	2	12	2	4	1	7	4	17	1	7	18	3	8	20	105
Fossil	min	1	7	0	1	0	3	2	11	0	5	9	1	5	14	77
Fossil	max	4	18	3	8	1	8	5	24	2	9	30	4	14	27	133
BioBurBiof	mean	1	0	1	7	0	5	6	1	1	1	3	2	6	1	34
BioBurBiof	min	0	0	0	3	0	2	4	1	0	0	0	0	3	0	15
BioBurBiof	max	1	1	1	15	1	6	9	2	1	1	4	3	8	1	53
SumSources	mean	20	41	11	84	17	42	44	38	11	28	58	39	74	46	558
SumSources	min	13	34	5	65	12	36	38	31	7	21	51	28	55	38	540
SumSources	max	27	49	15	101	27	55	53	45	19	34	72	46	84	54	568

	Bor_NAme			contUSA			Cent_NAme			Trop_SAme	...	India	SE_Asia			Temp_Eurasia_Japan			GLO
stats	max	mean	min	max	mean	min	max	mean	min	max	...	min	max	mean	min	max	mean	min	max	mean	min
proc
AgriWaste	4	3	2	23	18	16	8	5	2	27	...	14	32	24	12	24	19	9	243	188	115
BioBurBiof	1	1	0	1	0	0	1	1	0	15	...	0	8	6	3	1	1	0	53	34	15
Fossil	4	2	1	18	12	7	3	2	0	8	...	1	14	8	5	27	20	14	133	105	77
OtherNatural	4	2	0	9	3	1	2	1	0	22	...	1	26	9	2	13	4	1	132	64	21
SumSources	27	20	13	49	41	34	15	11	5	101	...	28	84	74	55	54	46	38	568	558	540
Wetlands	21	13	7	13	9	6	3	2	1	61	...	0	41	27	8	5	3	1	202	167	127