notebook.community

Edit and run



In [36]:

    
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions

#Output: json file formatted for Sankey diagram

#Created:       31.05.2016
#Last modified: 02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               03.06.2016 Add "Not affected" category; Exclude 0 and -99 values
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               09.06.2016 three levels



In [1]:

    
import numpy as np
import pandas as pd
import collections
import os
import xlrd



In [3]:

    
#Subcategories
#Pour Wetlands, il n'y a pas de sous catégories.
#Pour Other Natural, tu pourrais essayer avec :
#  Fresh water (122)
#  Geological source (52)
#  Wild animals (10)
#  Termites( 9)
#  Permafrost and hydrates (3)
#  Wild fires (1)

122+52+10+9+3+3









    Out[3]:





199



In [4]:

    
df_BU = pd.read_csv("../data/Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
df_BU









    Out[4]:






  
    
      
      proc
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35



In [5]:

    
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU









    Out[5]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35



In [6]:

    
#Add "Not affected" column and set values to 0
df_BU['Not affected'] = 0
df_BU









    Out[6]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
      0
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
      0
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
      0
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
      0
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
      0
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
      0
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
      0
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
      0
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
      0
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
      0
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
      0
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
      0
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
      0
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
      0
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35
      0



In [7]:

    
#Copy GLO values for OtherNat into "Not affected" col
df_BU['Not affected']['OtherNat'] = df_BU['GLO']['OtherNat']









    



/usr/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app



In [8]:

    
df_BU









    Out[8]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
      0
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
      0
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
      0
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
      0
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
      0
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
      0
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
      0
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
      0
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
      0
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
      0
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
      0
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35
      0



In [159]:

    
#sub_df = pd.read_csv("../data/Sankey_BU_subcategories.csv", header=1, sep="\;")
sub_df = pd.read_csv("../data/Sankey_BU_subcategories_withFake.csv", header=1, sep="\;")
sub_df









    Out[159]:






  
    
      
      proc
      stats
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
  
  
    
      0
      Coal
      mean
      41
      0
      0
      0
      0
    
    
      1
      Coal
      min
      26
      0
      0
      0
      0
    
    
      2
      Coal
      max
      50
      0
      0
      0
      0
    
    
      3
      GasAndOilIndustry
      mean
      79
      0
      0
      0
      0
    
    
      4
      GasAndOilIndustry
      min
      69
      0
      0
      0
      0
    
    
      5
      GasAndOilIndustry
      max
      88
      0
      0
      0
      0
    
    
      6
      EntericFermentationAndManure
      mean
      0
      106
      0
      0
      0
    
    
      7
      EntericFermentationAndManure
      min
      0
      97
      0
      0
      0
    
    
      8
      EntericFermentationAndManure
      max
      0
      111
      0
      0
      0
    
    
      9
      LandfillsAndWaste
      mean
      0
      59
      0
      0
      0
    
    
      10
      LandfillsAndWaste
      min
      0
      52
      0
      0
      0
    
    
      11
      LandfillsAndWaste
      max
      0
      63
      0
      0
      0
    
    
      12
      Rice
      mean
      0
      30
      0
      0
      0
    
    
      13
      Rice
      min
      0
      24
      0
      0
      0
    
    
      14
      Rice
      max
      0
      36
      0
      0
      0
    
    
      15
      BiomassBurning
      mean
      0
      0
      18
      0
      0
    
    
      16
      BiomassBurning
      min
      0
      0
      15
      0
      0
    
    
      17
      BiomassBurning
      max
      0
      0
      21
      0
      0
    
    
      18
      BiofuelBurning
      mean
      0
      0
      12
      0
      0
    
    
      19
      BiofuelBurning
      min
      0
      0
      10
      0
      0
    
    
      20
      BiofuelBurning
      max
      0
      0
      14
      0
      0
    
    
      21
      Freshwater
      mean
      0
      0
      0
      122
      0
    
    
      22
      Freshwater
      min
      0
      0
      0
      60
      0
    
    
      23
      Freshwater
      max
      0
      0
      0
      180
      0
    
    
      24
      GeologicalSources
      mean
      0
      0
      0
      52
      0
    
    
      25
      GeologicalSources
      min
      0
      0
      0
      -999
      0
    
    
      26
      GeologicalSources
      max
      0
      0
      0
      -1000
      0
    
    
      27
      WildAnimals
      mean
      0
      0
      0
      10
      0
    
    
      28
      WildAnimals
      min
      0
      0
      0
      5
      0
    
    
      29
      WildAnimals
      max
      0
      0
      0
      15
      0
    
    
      30
      Termites
      mean
      0
      0
      0
      9
      0
    
    
      31
      Termites
      min
      0
      0
      0
      3
      0
    
    
      32
      Termites
      max
      0
      0
      0
      15
      0
    
    
      33
      PermafrostAndHydrates
      mean
      0
      0
      0
      3
      0
    
    
      34
      PermafrostAndHydrates
      min
      0
      0
      0
      -888
      0
    
    
      35
      PermafrostAndHydrates
      max
      0
      0
      0
      -889
      0
    
    
      36
      Wildfires
      mean
      0
      0
      0
      3
      0
    
    
      37
      Wildfires
      min
      0
      0
      0
      1
      0
    
    
      38
      Wildfires
      max
      0
      0
      0
      5
      0
    
    
      39
      fakeSource
      mean
      0
      0
      0
      0
      100
    
    
      40
      fakeSource
      min
      0
      0
      0
      0
      1
    
    
      41
      fakeSource
      max
      0
      0
      0
      0
      10



In [160]:

    
sub_df['proc']









    Out[160]:





0                             Coal
1                             Coal
2                             Coal
3                GasAndOilIndustry
4                GasAndOilIndustry
5                GasAndOilIndustry
6     EntericFermentationAndManure
7     EntericFermentationAndManure
8     EntericFermentationAndManure
9                LandfillsAndWaste
10               LandfillsAndWaste
11               LandfillsAndWaste
12                            Rice
13                            Rice
14                            Rice
15                  BiomassBurning
16                  BiomassBurning
17                  BiomassBurning
18                  BiofuelBurning
19                  BiofuelBurning
20                  BiofuelBurning
21                      Freshwater
22                      Freshwater
23                      Freshwater
24               GeologicalSources
25               GeologicalSources
26               GeologicalSources
27                     WildAnimals
28                     WildAnimals
29                     WildAnimals
30                        Termites
31                        Termites
32                        Termites
33           PermafrostAndHydrates
34           PermafrostAndHydrates
35           PermafrostAndHydrates
36                       Wildfires
37                       Wildfires
38                       Wildfires
39                      fakeSource
40                      fakeSource
41                      fakeSource
Name: proc, dtype: object



In [161]:

    
sub_df.index = sub_df['proc']
sub_df = sub_df.drop(['proc'], axis=1)
sub_df.head(10)









    Out[161]:






  
    
      
      stats
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      proc
      
      
      
      
      
      
    
  
  
    
      Coal
      mean
      41
      0
      0
      0
      0
    
    
      Coal
      min
      26
      0
      0
      0
      0
    
    
      Coal
      max
      50
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      mean
      79
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      min
      69
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      max
      88
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      mean
      0
      106
      0
      0
      0
    
    
      EntericFermentationAndManure
      min
      0
      97
      0
      0
      0
    
    
      EntericFermentationAndManure
      max
      0
      111
      0
      0
      0
    
    
      LandfillsAndWaste
      mean
      0
      59
      0
      0
      0



In [162]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv









    Out[162]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Agriwast
      3
      3
      2
      23
      17
      15
      6
      5
      2
      23
      ...
      18
      22
      20
      17
      206
      195
      178
      0
      0
      0
    
    
      BioBurBiof
      1
      1
      0
      2
      1
      0
      1
      1
      0
      6
      ...
      4
      2
      1
      1
      35
      30
      27
      0
      0
      0
    
    
      Fossil
      3
      2
      1
      16
      11
      9
      3
      2
      0
      19
      ...
      5
      26
      22
      20
      133
      121
      114
      0
      0
      0
    
    
      OtherNat
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      ...
      -99
      -99
      -99
      -99
      297
      199
      104
      297
      199
      104
    
    
      Wetlands
      61
      32
      15
      23
      13
      6
      4
      2
      1
      59
      ...
      18
      6
      3
      1
      227
      185
      153
      0
      0
      0
    
  

5 rows × 48 columns



In [163]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
sub_df_piv = sub_df.pivot(columns='stats', index=sub_df.index)
sub_df_piv









    Out[163]:






  
    
      
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      stats
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BiofuelBurning
      0
      0
      0
      0
      0
      0
      14
      10
      12
      0
      0
      0
      0
      0
      0
    
    
      BiomassBurning
      0
      0
      0
      0
      0
      0
      21
      15
      18
      0
      0
      0
      0
      0
      0
    
    
      Coal
      50
      26
      41
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      0
      0
      0
      111
      97
      106
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Freshwater
      0
      0
      0
      0
      0
      0
      0
      0
      0
      180
      60
      122
      0
      0
      0
    
    
      GasAndOilIndustry
      88
      69
      79
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      GeologicalSources
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -1000
      -999
      52
      0
      0
      0
    
    
      LandfillsAndWaste
      0
      0
      0
      63
      52
      59
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      PermafrostAndHydrates
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -889
      -888
      3
      0
      0
      0
    
    
      Rice
      0
      0
      0
      36
      24
      30
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Termites
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      3
      9
      0
      0
      0
    
    
      WildAnimals
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      5
      10
      0
      0
      0
    
    
      Wildfires
      0
      0
      0
      0
      0
      0
      0
      0
      0
      5
      1
      3
      0
      0
      0
    
    
      fakeSource
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      10
      1
      100



In [10]:

    
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme









    Out[10]:





'Trop_SAme'



In [11]:

    
df_BU_piv['Bor_NAme']['mean']









    Out[11]:





proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat     -99
Wetlands      32
Name: mean, dtype: int64



In [12]:

    
df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']









    Out[12]:





3



In [13]:

    
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']









    Out[13]:





1



In [165]:

    
#Create regions, categories and subcategories array

#regions
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3

idx = 0
regions = []
for num in range(0,numRegions):    
    if df_BU_piv.columns[idx][0] != "GLO" : #exclude GLO column
      regions.append(df_BU_piv.columns[idx][0])
    idx = idx + 3

#categories    
categories = df_BU_piv.index.tolist()
 
#subcategories
subcategories = sub_df.index.tolist()
subcategories = list(set(subcategories)) #finds unique values
   
regions
categories
subcategories









    Out[165]:





['Termites',
 'Wildfires',
 'Freshwater',
 'WildAnimals',
 'EntericFermentationAndManure',
 'fakeSource',
 'LandfillsAndWaste',
 'Coal',
 'BiomassBurning',
 'GeologicalSources',
 'PermafrostAndHydrates',
 'Rice',
 'BiofuelBurning',
 'GasAndOilIndustry']



In [110]:

    
#Create sources array
#sources = df_BU_piv.index.tolist()
#sources









    Out[110]:





['Agriwast', 'BioBurBiof', 'Fossil', 'OtherNat', 'Wetlands']



In [166]:

    
sub_df_piv.index.tolist()









    Out[166]:





['BiofuelBurning',
 'BiomassBurning',
 'Coal',
 'EntericFermentationAndManure',
 'Freshwater',
 'GasAndOilIndustry',
 'GeologicalSources',
 'LandfillsAndWaste',
 'PermafrostAndHydrates',
 'Rice',
 'Termites',
 'WildAnimals',
 'Wildfires',
 'fakeSource']



In [167]:

    
nodes = categories + regions + subcategories
nodes









    Out[167]:





['Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands',
 'Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'Not affected',
 'Termites',
 'Wildfires',
 'Freshwater',
 'WildAnimals',
 'EntericFermentationAndManure',
 'fakeSource',
 'LandfillsAndWaste',
 'Coal',
 'BiomassBurning',
 'GeologicalSources',
 'PermafrostAndHydrates',
 'Rice',
 'BiofuelBurning',
 'GasAndOilIndustry']



In [168]:

    
sources = categories + subcategories
sources









    Out[168]:





['Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands',
 'Termites',
 'Wildfires',
 'Freshwater',
 'WildAnimals',
 'EntericFermentationAndManure',
 'fakeSource',
 'LandfillsAndWaste',
 'Coal',
 'BiomassBurning',
 'GeologicalSources',
 'PermafrostAndHydrates',
 'Rice',
 'BiofuelBurning',
 'GasAndOilIndustry']



In [169]:

    
targets = regions + categories
targets









    Out[169]:





['Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'Not affected',
 'Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands']



In [170]:

    
#Replace -99 values with 0
#df_BU_piv = df_BU_piv.replace(-99, 1)
#df_BU_piv

#Replace -99 and 0 values with -1
#df_BU_piv = df_BU_piv.replace(-99, -1)
#df_BU_piv = df_BU_piv.replace(0, -1)
df_BU_piv









    Out[170]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Agriwast
      3
      3
      2
      23
      17
      15
      6
      5
      2
      23
      ...
      18
      22
      20
      17
      206
      195
      178
      0
      0
      0
    
    
      BioBurBiof
      1
      1
      0
      2
      1
      0
      1
      1
      0
      6
      ...
      4
      2
      1
      1
      35
      30
      27
      0
      0
      0
    
    
      Fossil
      3
      2
      1
      16
      11
      9
      3
      2
      0
      19
      ...
      5
      26
      22
      20
      133
      121
      114
      0
      0
      0
    
    
      OtherNat
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      ...
      -99
      -99
      -99
      -99
      297
      199
      104
      297
      199
      104
    
    
      Wetlands
      61
      32
      15
      23
      13
      6
      4
      2
      1
      59
      ...
      18
      6
      3
      1
      227
      185
      153
      0
      0
      0
    
  

5 rows × 48 columns



In [190]:

    
sub_df_piv









    Out[190]:






  
    
      
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      stats
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BiofuelBurning
      0
      0
      0
      0
      0
      0
      14
      10
      12
      0
      0
      0
      0
      0
      0
    
    
      BiomassBurning
      0
      0
      0
      0
      0
      0
      21
      15
      18
      0
      0
      0
      0
      0
      0
    
    
      Coal
      50
      26
      41
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      0
      0
      0
      111
      97
      106
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Freshwater
      0
      0
      0
      0
      0
      0
      0
      0
      0
      180
      60
      122
      0
      0
      0
    
    
      GasAndOilIndustry
      88
      69
      79
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      GeologicalSources
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -1000
      -999
      52
      0
      0
      0
    
    
      LandfillsAndWaste
      0
      0
      0
      63
      52
      59
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      PermafrostAndHydrates
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -889
      -888
      3
      0
      0
      0
    
    
      Rice
      0
      0
      0
      36
      24
      30
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Termites
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      3
      9
      0
      0
      0
    
    
      WildAnimals
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      5
      10
      0
      0
      0
    
    
      Wildfires
      0
      0
      0
      0
      0
      0
      0
      0
      0
      5
      1
      3
      0
      0
      0
    
    
      fakeSource
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      10
      1
      100



In [206]:

    
targets #regions + categories

#df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']
sub_df_piv['OtherNat'].loc['Freshwater'][2] #[0] = max, [1] = min, [2] = mean

sub_df_piv['Agriwast'].loc['Termites'][2] #[0] = max, [1] = min, [2] = mean









    Out[206]:





0



In [216]:

    
#Write json file for 3-level Sankey, MEAN VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_25MAy2016_mean_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    #print category
    for region in regions:
        #print region
        value = df_BU_piv[region].loc[category]['mean']     
        if value != -99:
            if value != 0:
                #print value
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))                
    
#source-target pairs for subcategories -> categories
for subcategory in subcategories:    
    for category in categories:    
        value2 = sub_df_piv[category].loc[subcategory][2] #[0] = max, [1] = min, [2] = mean
        if value2 != 0:
            file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [50]:

    
#Write json file for 3-level Sankey, MIN VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_25MAy2016_min_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    for region in regions:
        meanValue = df_BU_piv[region].loc[category]['mean']
        value = df_BU_piv[region].loc[category]['min']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [51]:

    
#Write json file for Sankey, MAX VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_25MAy2016_max_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    #print source
    for target in targets:
        #print target
        #print df_BU_piv[target].loc[source]['max']
        meanValue = df_BU_piv[target].loc[source]['mean']
        value = df_BU_piv[target].loc[source]['max']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [20]:

    
#Write tsv file with min/max values
#file_name = "../data/statsBU.tsv"

#df_BU_piv.to_csv(file_name, sep='\t', encoding='utf-8')
#df_BU_piv



In [39]:

    
#junk = pd.read_csv("../data/Sankey_BU_2003-2012_25MAy2016.txt", header=1, delim_whitespace=True)
#junk.rename(columns = {'proc':'stats'}, inplace = True)



In [40]:

    
#Write tsv file with min/max values
#file_name = "../data/junk.tsv"

#junk.to_csv(file_name, sep='\t', encoding='utf-8')



In [ ]:

	proc	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
Wetlands	mean	32	13	2	42	4	8	19	14	3	4	5	6	29	3	185
Wetlands	min	15	6	1	19	1	3	15	5	1	1	1	1	18	1	153
Wetlands	max	61	23	4	59	7	16	22	26	6	7	9	13	35	6	227
OtherNat	mean	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	199
OtherNat	min	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	104
OtherNat	max	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	297
Agriwast	mean	3	17	5	21	6	14	6	5	5	17	30	21	22	20	195
Agriwast	min	2	15	2	18	5	12	5	5	5	15	23	16	18	17	178
Agriwast	max	3	23	6	23	6	18	7	5	5	18	36	24	25	22	206
Fossil	mean	2	11	2	8	1	9	4	20	2	6	24	3	6	22	121
Fossil	min	1	9	0	3	1	7	3	18	1	3	15	2	5	20	114
Fossil	max	3	16	3	19	1	14	5	26	2	8	31	4	7	26	133
BioBurBiof	mean	1	1	1	3	0	4	6	2	1	1	3	2	5	1	30
BioBurBiof	min	0	0	0	1	0	3	4	0	0	0	2	2	4	1	27
BioBurBiof	max	1	2	1	6	1	5	10	4	1	1	4	3	5	2	35

	stats	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
proc
Wetlands	mean	32	13	2	42	4	8	19	14	3	4	5	6	29	3	185
Wetlands	min	15	6	1	19	1	3	15	5	1	1	1	1	18	1	153
Wetlands	max	61	23	4	59	7	16	22	26	6	7	9	13	35	6	227
OtherNat	mean	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	199
OtherNat	min	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	104
OtherNat	max	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	297
Agriwast	mean	3	17	5	21	6	14	6	5	5	17	30	21	22	20	195
Agriwast	min	2	15	2	18	5	12	5	5	5	15	23	16	18	17	178
Agriwast	max	3	23	6	23	6	18	7	5	5	18	36	24	25	22	206
Fossil	mean	2	11	2	8	1	9	4	20	2	6	24	3	6	22	121
Fossil	min	1	9	0	3	1	7	3	18	1	3	15	2	5	20	114
Fossil	max	3	16	3	19	1	14	5	26	2	8	31	4	7	26	133
BioBurBiof	mean	1	1	1	3	0	4	6	2	1	1	3	2	5	1	30
BioBurBiof	min	0	0	0	1	0	3	4	0	0	0	2	2	4	1	27
BioBurBiof	max	1	2	1	6	1	5	10	4	1	1	4	3	5	2	35

	proc	stats	Fossil	Agriwast	BioBurBiof	OtherNat	Wetlands
0	Coal	mean	41	0	0	0	0
1	Coal	min	26	0	0	0	0
2	Coal	max	50	0	0	0	0
3	GasAndOilIndustry	mean	79	0	0	0	0
4	GasAndOilIndustry	min	69	0	0	0	0
5	GasAndOilIndustry	max	88	0	0	0	0
6	EntericFermentationAndManure	mean	0	106	0	0	0
7	EntericFermentationAndManure	min	0	97	0	0	0
8	EntericFermentationAndManure	max	0	111	0	0	0
9	LandfillsAndWaste	mean	0	59	0	0	0
10	LandfillsAndWaste	min	0	52	0	0	0
11	LandfillsAndWaste	max	0	63	0	0	0
12	Rice	mean	0	30	0	0	0
13	Rice	min	0	24	0	0	0
14	Rice	max	0	36	0	0	0
15	BiomassBurning	mean	0	0	18	0	0
16	BiomassBurning	min	0	0	15	0	0
17	BiomassBurning	max	0	0	21	0	0
18	BiofuelBurning	mean	0	0	12	0	0
19	BiofuelBurning	min	0	0	10	0	0
20	BiofuelBurning	max	0	0	14	0	0
21	Freshwater	mean	0	0	0	122	0
22	Freshwater	min	0	0	0	60	0
23	Freshwater	max	0	0	0	180	0
24	GeologicalSources	mean	0	0	0	52	0
25	GeologicalSources	min	0	0	0	-999	0
26	GeologicalSources	max	0	0	0	-1000	0
27	WildAnimals	mean	0	0	0	10	0
28	WildAnimals	min	0	0	0	5	0
29	WildAnimals	max	0	0	0	15	0
30	Termites	mean	0	0	0	9	0
31	Termites	min	0	0	0	3	0
32	Termites	max	0	0	0	15	0
33	PermafrostAndHydrates	mean	0	0	0	3	0
34	PermafrostAndHydrates	min	0	0	0	-888	0
35	PermafrostAndHydrates	max	0	0	0	-889	0
36	Wildfires	mean	0	0	0	3	0
37	Wildfires	min	0	0	0	1	0
38	Wildfires	max	0	0	0	5	0
39	fakeSource	mean	0	0	0	0	100
40	fakeSource	min	0	0	0	0	1
41	fakeSource	max	0	0	0	0	10

	Bor_NAme			contUSA			Cent_NAme			Trop_SAme	...	SE_Asia	Temp_Eurasia_Japan			GLO			Not affected
stats	max	mean	min	max	mean	min	max	mean	min	max	...	min	max	mean	min	max	mean	min	max	mean	min
proc
Agriwast	3	3	2	23	17	15	6	5	2	23	...	18	22	20	17	206	195	178	0	0	0
BioBurBiof	1	1	0	2	1	0	1	1	0	6	...	4	2	1	1	35	30	27	0	0	0
Fossil	3	2	1	16	11	9	3	2	0	19	...	5	26	22	20	133	121	114	0	0	0
OtherNat	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	...	-99	-99	-99	-99	297	199	104	297	199	104
Wetlands	61	32	15	23	13	6	4	2	1	59	...	18	6	3	1	227	185	153	0	0	0