notebook.community

Edit and run



In [36]:

    
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions

#Output: json file formatted for Sankey diagram

#Created:       31.05.2016
#Last modified: 02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               03.06.2016 Add "Not affected" category; Exclude 0 and -99 values
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               09.06.2016 three levels
#               10.06.2016 New data files from Marielle



In [19]:

    
import numpy as np
import pandas as pd
import collections
import os
import xlrd



In [20]:

    
#Subcategories
#Pour Wetlands, il n'y a pas de sous catégories.
#Pour Other Natural, tu pourrais essayer avec :
#  Fresh water (122)
#  Geological source (52)
#  Wild animals (10)
#  Termites( 9)
#  Permafrost and hydrates (3)
#  Wild fires (1)

122+52+10+9+3+3









    Out[20]:





199



In [90]:

    
df_BU = pd.read_csv("../data/Sankey_BU_2003-2012_10june2016.txt", header=1, delim_whitespace=True)
df_BU









    Out[90]:






  
    
      
      proc
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35



In [91]:

    
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU









    Out[91]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35



In [92]:

    
#Add "Not affected" column and set values to 0
df_BU['Not affected'] = 0
df_BU









    Out[92]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
      0
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
      0
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
      0
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
      0
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
      0
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
      0
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
      0
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
      0
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
      0
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
      0
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
      0
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
      0
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
      0
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
      0
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35
      0



In [93]:

    
#Copy GLO values for OtherNat into "Not affected" col
df_BU['Not affected']['OtherNat'] = df_BU['GLO']['OtherNat']









    



/usr/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app



In [94]:

    
df_BU









    Out[94]:






  
    
      
      stats
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      Temp_SAme
      NAfr
      SAfr
      Russia
      Oceania
      Europe
      China
      India
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Wetlands
      mean
      32
      13
      2
      42
      4
      8
      19
      14
      3
      4
      5
      6
      29
      3
      185
      0
    
    
      Wetlands
      min
      15
      6
      1
      19
      1
      3
      15
      5
      1
      1
      1
      1
      18
      1
      153
      0
    
    
      Wetlands
      max
      61
      23
      4
      59
      7
      16
      22
      26
      6
      7
      9
      13
      35
      6
      227
      0
    
    
      OtherNat
      mean
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      199
      199
    
    
      OtherNat
      min
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      104
      104
    
    
      OtherNat
      max
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      297
      297
    
    
      Agriwast
      mean
      3
      17
      5
      21
      6
      14
      6
      5
      5
      17
      30
      21
      22
      20
      195
      0
    
    
      Agriwast
      min
      2
      15
      2
      18
      5
      12
      5
      5
      5
      15
      23
      16
      18
      17
      178
      0
    
    
      Agriwast
      max
      3
      23
      6
      23
      6
      18
      7
      5
      5
      18
      36
      24
      25
      22
      206
      0
    
    
      Fossil
      mean
      2
      11
      2
      8
      1
      9
      4
      20
      2
      6
      24
      3
      6
      22
      121
      0
    
    
      Fossil
      min
      1
      9
      0
      3
      1
      7
      3
      18
      1
      3
      15
      2
      5
      20
      114
      0
    
    
      Fossil
      max
      3
      16
      3
      19
      1
      14
      5
      26
      2
      8
      31
      4
      7
      26
      133
      0
    
    
      BioBurBiof
      mean
      1
      1
      1
      3
      0
      4
      6
      2
      1
      1
      3
      2
      5
      1
      30
      0
    
    
      BioBurBiof
      min
      0
      0
      0
      1
      0
      3
      4
      0
      0
      0
      2
      2
      4
      1
      27
      0
    
    
      BioBurBiof
      max
      1
      2
      1
      6
      1
      5
      10
      4
      1
      1
      4
      3
      5
      2
      35
      0



In [95]:

    
#sub_df = pd.read_csv("../data/Sankey_BU_subcategories.csv", header=1, sep="\;")
sub_df = pd.read_csv("../data/Sankey_BU_subcategories_withFake.csv", header=1, sep="\;")
sub_df









    Out[95]:






  
    
      
      proc
      stats
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
  
  
    
      0
      Coal
      mean
      41
      0
      0
      0
      0
    
    
      1
      Coal
      min
      26
      0
      0
      0
      0
    
    
      2
      Coal
      max
      50
      0
      0
      0
      0
    
    
      3
      GasAndOilIndustry
      mean
      79
      0
      0
      0
      0
    
    
      4
      GasAndOilIndustry
      min
      69
      0
      0
      0
      0
    
    
      5
      GasAndOilIndustry
      max
      88
      0
      0
      0
      0
    
    
      6
      EntericFermentationAndManure
      mean
      0
      106
      0
      0
      0
    
    
      7
      EntericFermentationAndManure
      min
      0
      97
      0
      0
      0
    
    
      8
      EntericFermentationAndManure
      max
      0
      111
      0
      0
      0
    
    
      9
      LandfillsAndWaste
      mean
      0
      59
      0
      0
      0
    
    
      10
      LandfillsAndWaste
      min
      0
      52
      0
      0
      0
    
    
      11
      LandfillsAndWaste
      max
      0
      63
      0
      0
      0
    
    
      12
      Rice
      mean
      0
      30
      0
      0
      0
    
    
      13
      Rice
      min
      0
      24
      0
      0
      0
    
    
      14
      Rice
      max
      0
      36
      0
      0
      0
    
    
      15
      BiomassBurning
      mean
      0
      0
      18
      0
      0
    
    
      16
      BiomassBurning
      min
      0
      0
      15
      0
      0
    
    
      17
      BiomassBurning
      max
      0
      0
      21
      0
      0
    
    
      18
      BiofuelBurning
      mean
      0
      0
      12
      0
      0
    
    
      19
      BiofuelBurning
      min
      0
      0
      10
      0
      0
    
    
      20
      BiofuelBurning
      max
      0
      0
      14
      0
      0
    
    
      21
      Freshwater
      mean
      0
      0
      0
      122
      0
    
    
      22
      Freshwater
      min
      0
      0
      0
      60
      0
    
    
      23
      Freshwater
      max
      0
      0
      0
      180
      0
    
    
      24
      GeologicalSources
      mean
      0
      0
      0
      52
      0
    
    
      25
      GeologicalSources
      min
      0
      0
      0
      32
      0
    
    
      26
      GeologicalSources
      max
      0
      0
      0
      76
      0
    
    
      27
      WildAnimals
      mean
      0
      0
      0
      10
      0
    
    
      28
      WildAnimals
      min
      0
      0
      0
      5
      0
    
    
      29
      WildAnimals
      max
      0
      0
      0
      15
      0
    
    
      30
      Termites
      mean
      0
      0
      0
      9
      0
    
    
      31
      Termites
      min
      0
      0
      0
      3
      0
    
    
      32
      Termites
      max
      0
      0
      0
      15
      0
    
    
      33
      PermafrostAndHydrates
      mean
      0
      0
      0
      3
      0
    
    
      34
      PermafrostAndHydrates
      min
      0
      0
      0
      -888
      0
    
    
      35
      PermafrostAndHydrates
      max
      0
      0
      0
      -889
      0
    
    
      36
      Wildfires
      mean
      0
      0
      0
      3
      0
    
    
      37
      Wildfires
      min
      0
      0
      0
      1
      0
    
    
      38
      Wildfires
      max
      0
      0
      0
      5
      0
    
    
      39
      fakeSource
      mean
      0
      0
      0
      0
      100
    
    
      40
      fakeSource
      min
      0
      0
      0
      0
      1
    
    
      41
      fakeSource
      max
      0
      0
      0
      0
      10



In [96]:

    
sub_df.index = sub_df['proc']
sub_df = sub_df.drop(['proc'], axis=1)
sub_df.head(10)









    Out[96]:






  
    
      
      stats
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      proc
      
      
      
      
      
      
    
  
  
    
      Coal
      mean
      41
      0
      0
      0
      0
    
    
      Coal
      min
      26
      0
      0
      0
      0
    
    
      Coal
      max
      50
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      mean
      79
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      min
      69
      0
      0
      0
      0
    
    
      GasAndOilIndustry
      max
      88
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      mean
      0
      106
      0
      0
      0
    
    
      EntericFermentationAndManure
      min
      0
      97
      0
      0
      0
    
    
      EntericFermentationAndManure
      max
      0
      111
      0
      0
      0
    
    
      LandfillsAndWaste
      mean
      0
      59
      0
      0
      0



In [97]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv









    Out[97]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Agriwast
      3
      3
      2
      23
      17
      15
      6
      5
      2
      23
      ...
      18
      22
      20
      17
      206
      195
      178
      0
      0
      0
    
    
      BioBurBiof
      1
      1
      0
      2
      1
      0
      1
      1
      0
      6
      ...
      4
      2
      1
      1
      35
      30
      27
      0
      0
      0
    
    
      Fossil
      3
      2
      1
      16
      11
      9
      3
      2
      0
      19
      ...
      5
      26
      22
      20
      133
      121
      114
      0
      0
      0
    
    
      OtherNat
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      ...
      -99
      -99
      -99
      -99
      297
      199
      104
      297
      199
      104
    
    
      Wetlands
      61
      32
      15
      23
      13
      6
      4
      2
      1
      59
      ...
      18
      6
      3
      1
      227
      185
      153
      0
      0
      0
    
  

5 rows × 48 columns



In [98]:

    
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
sub_df_piv = sub_df.pivot(columns='stats', index=sub_df.index)
sub_df_piv









    Out[98]:






  
    
      
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      stats
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BiofuelBurning
      0
      0
      0
      0
      0
      0
      14
      10
      12
      0
      0
      0
      0
      0
      0
    
    
      BiomassBurning
      0
      0
      0
      0
      0
      0
      21
      15
      18
      0
      0
      0
      0
      0
      0
    
    
      Coal
      50
      26
      41
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      0
      0
      0
      111
      97
      106
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Freshwater
      0
      0
      0
      0
      0
      0
      0
      0
      0
      180
      60
      122
      0
      0
      0
    
    
      GasAndOilIndustry
      88
      69
      79
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      GeologicalSources
      0
      0
      0
      0
      0
      0
      0
      0
      0
      76
      32
      52
      0
      0
      0
    
    
      LandfillsAndWaste
      0
      0
      0
      63
      52
      59
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      PermafrostAndHydrates
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -889
      -888
      3
      0
      0
      0
    
    
      Rice
      0
      0
      0
      36
      24
      30
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Termites
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      3
      9
      0
      0
      0
    
    
      WildAnimals
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      5
      10
      0
      0
      0
    
    
      Wildfires
      0
      0
      0
      0
      0
      0
      0
      0
      0
      5
      1
      3
      0
      0
      0
    
    
      fakeSource
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      10
      1
      100



In [99]:

    
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme









    Out[99]:





'Trop_SAme'



In [100]:

    
df_BU_piv['Bor_NAme']['mean']









    Out[100]:





proc
Agriwast       3
BioBurBiof     1
Fossil         2
OtherNat     -99
Wetlands      32
Name: mean, dtype: int64



In [101]:

    
df_BU_piv['Bor_NAme'].loc['Agriwast']









    Out[101]:





stats
max     3
mean    3
min     2
Name: Agriwast, dtype: int64



In [102]:

    
df_BU_piv['Bor_NAme'].loc['BioBurBiof']['mean']









    Out[102]:





1



In [103]:

    
#Create regions, categories and subcategories array

#regions
#Store region names in list
numRegions = df_BU_piv.shape[1] / 3

idx = 0
regions = []
for num in range(0,numRegions):    
    if df_BU_piv.columns[idx][0] != "GLO" : #exclude GLO column
      regions.append(df_BU_piv.columns[idx][0])
    idx = idx + 3

#categories    
categories = df_BU_piv.index.tolist()
 
#subcategories
subcategories = sub_df.index.tolist()
subcategories = list(set(subcategories)) #finds unique values
   
regions
categories
subcategories









    Out[103]:





['Termites',
 'Wildfires',
 'Freshwater',
 'WildAnimals',
 'EntericFermentationAndManure',
 'fakeSource',
 'LandfillsAndWaste',
 'Coal',
 'BiomassBurning',
 'GeologicalSources',
 'PermafrostAndHydrates',
 'Rice',
 'BiofuelBurning',
 'GasAndOilIndustry']



In [104]:

    
nodes = categories + regions + subcategories
nodes









    Out[104]:





['Agriwast',
 'BioBurBiof',
 'Fossil',
 'OtherNat',
 'Wetlands',
 'Bor_NAme',
 'contUSA',
 'Cent_NAme',
 'Trop_SAme',
 'Temp_SAme',
 'NAfr',
 'SAfr',
 'Russia',
 'Oceania',
 'Europe',
 'China',
 'India',
 'SE_Asia',
 'Temp_Eurasia_Japan',
 'Not affected',
 'Termites',
 'Wildfires',
 'Freshwater',
 'WildAnimals',
 'EntericFermentationAndManure',
 'fakeSource',
 'LandfillsAndWaste',
 'Coal',
 'BiomassBurning',
 'GeologicalSources',
 'PermafrostAndHydrates',
 'Rice',
 'BiofuelBurning',
 'GasAndOilIndustry']



In [105]:

    
#Replace -99 values with 0
#df_BU_piv = df_BU_piv.replace(-99, 1)
#df_BU_piv

#Replace -99 and 0 values with -1
#df_BU_piv = df_BU_piv.replace(-99, -1)
#df_BU_piv = df_BU_piv.replace(0, -1)
df_BU_piv









    Out[105]:






  
    
      
      Bor_NAme
      contUSA
      Cent_NAme
      Trop_SAme
      ...
      SE_Asia
      Temp_Eurasia_Japan
      GLO
      Not affected
    
    
      stats
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
      max
      ...
      min
      max
      mean
      min
      max
      mean
      min
      max
      mean
      min
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Agriwast
      3
      3
      2
      23
      17
      15
      6
      5
      2
      23
      ...
      18
      22
      20
      17
      206
      195
      178
      0
      0
      0
    
    
      BioBurBiof
      1
      1
      0
      2
      1
      0
      1
      1
      0
      6
      ...
      4
      2
      1
      1
      35
      30
      27
      0
      0
      0
    
    
      Fossil
      3
      2
      1
      16
      11
      9
      3
      2
      0
      19
      ...
      5
      26
      22
      20
      133
      121
      114
      0
      0
      0
    
    
      OtherNat
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      -99
      ...
      -99
      -99
      -99
      -99
      297
      199
      104
      297
      199
      104
    
    
      Wetlands
      61
      32
      15
      23
      13
      6
      4
      2
      1
      59
      ...
      18
      6
      3
      1
      227
      185
      153
      0
      0
      0
    
  

5 rows × 48 columns



In [106]:

    
sub_df_piv









    Out[106]:






  
    
      
      Fossil
      Agriwast
      BioBurBiof
      OtherNat
      Wetlands
    
    
      stats
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
      max
      min
      mean
    
    
      proc
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BiofuelBurning
      0
      0
      0
      0
      0
      0
      14
      10
      12
      0
      0
      0
      0
      0
      0
    
    
      BiomassBurning
      0
      0
      0
      0
      0
      0
      21
      15
      18
      0
      0
      0
      0
      0
      0
    
    
      Coal
      50
      26
      41
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      EntericFermentationAndManure
      0
      0
      0
      111
      97
      106
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Freshwater
      0
      0
      0
      0
      0
      0
      0
      0
      0
      180
      60
      122
      0
      0
      0
    
    
      GasAndOilIndustry
      88
      69
      79
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      GeologicalSources
      0
      0
      0
      0
      0
      0
      0
      0
      0
      76
      32
      52
      0
      0
      0
    
    
      LandfillsAndWaste
      0
      0
      0
      63
      52
      59
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      PermafrostAndHydrates
      0
      0
      0
      0
      0
      0
      0
      0
      0
      -889
      -888
      3
      0
      0
      0
    
    
      Rice
      0
      0
      0
      36
      24
      30
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      Termites
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      3
      9
      0
      0
      0
    
    
      WildAnimals
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15
      5
      10
      0
      0
      0
    
    
      Wildfires
      0
      0
      0
      0
      0
      0
      0
      0
      0
      5
      1
      3
      0
      0
      0
    
    
      fakeSource
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      10
      1
      100



In [107]:

    
#df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']
sub_df_piv['OtherNat'].loc['Freshwater'][2] #[0] = max, [1] = min, [2] = mean

sub_df_piv['Agriwast'].loc['Termites'][2] #[0] = max, [1] = min, [2] = mean









    Out[107]:





0



In [108]:

    
#Write json file for 3-level Sankey, MEAN VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_10june2016_mean_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    #print category
    for region in regions:
        #print region
        value = df_BU_piv[region].loc[category]['mean']     
        if value != -99:
            if value != 0:
                #print value
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))                
    
#source-target pairs for subcategories -> categories
for subcategory in subcategories:    
    for category in categories:    
        value2 = sub_df_piv[category].loc[subcategory][2] #[0] = max, [1] = min, [2] = mean
        if value2 != 0:
            file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [109]:

    
#Write json file for 3-level Sankey, MIN VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_10june2016_min_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    for region in regions:
        meanValue = df_BU_piv[region].loc[category]['mean']
        value = df_BU_piv[region].loc[category]['min']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))
                
#source-target pairs for subcategories -> categories
for subcategory in subcategories:
    for category in categories:    
        value2 = sub_df_piv[category].loc[subcategory][1] #[0] = max, [1] = min, [2] = mean
        if value2 != 0:
            file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [110]:

    
#Write json file for Sankey, MAX VALUES ONLY
#GLO omitted

file = open('../data/Sankey_BU_2003-2012_10june2016_max_3levels.json', 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    for region in regions:
        meanValue = df_BU_piv[region].loc[category]['mean']
        value = df_BU_piv[region].loc[category]['max']
        if meanValue != -99:
            if meanValue != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))
                
#source-target pairs for subcategories -> categories
for subcategory in subcategories:
    for category in categories:    
        value2 = sub_df_piv[category].loc[subcategory][0] #[0] = max, [1] = min, [2] = mean
        if value2 != 0:
            file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(-2, os.SEEK_END)
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()



In [ ]:



In [ ]:



In [40]:



In [ ]:

	proc	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
Wetlands	mean	32	13	2	42	4	8	19	14	3	4	5	6	29	3	185
Wetlands	min	15	6	1	19	1	3	15	5	1	1	1	1	18	1	153
Wetlands	max	61	23	4	59	7	16	22	26	6	7	9	13	35	6	227
OtherNat	mean	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	199
OtherNat	min	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	104
OtherNat	max	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	297
Agriwast	mean	3	17	5	21	6	14	6	5	5	17	30	21	22	20	195
Agriwast	min	2	15	2	18	5	12	5	5	5	15	23	16	18	17	178
Agriwast	max	3	23	6	23	6	18	7	5	5	18	36	24	25	22	206
Fossil	mean	2	11	2	8	1	9	4	20	2	6	24	3	6	22	121
Fossil	min	1	9	0	3	1	7	3	18	1	3	15	2	5	20	114
Fossil	max	3	16	3	19	1	14	5	26	2	8	31	4	7	26	133
BioBurBiof	mean	1	1	1	3	0	4	6	2	1	1	3	2	5	1	30
BioBurBiof	min	0	0	0	1	0	3	4	0	0	0	2	2	4	1	27
BioBurBiof	max	1	2	1	6	1	5	10	4	1	1	4	3	5	2	35

	stats	Bor_NAme	contUSA	Cent_NAme	Trop_SAme	Temp_SAme	NAfr	SAfr	Russia	Oceania	Europe	China	India	SE_Asia	Temp_Eurasia_Japan	GLO
proc
Wetlands	mean	32	13	2	42	4	8	19	14	3	4	5	6	29	3	185
Wetlands	min	15	6	1	19	1	3	15	5	1	1	1	1	18	1	153
Wetlands	max	61	23	4	59	7	16	22	26	6	7	9	13	35	6	227
OtherNat	mean	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	199
OtherNat	min	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	104
OtherNat	max	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	297
Agriwast	mean	3	17	5	21	6	14	6	5	5	17	30	21	22	20	195
Agriwast	min	2	15	2	18	5	12	5	5	5	15	23	16	18	17	178
Agriwast	max	3	23	6	23	6	18	7	5	5	18	36	24	25	22	206
Fossil	mean	2	11	2	8	1	9	4	20	2	6	24	3	6	22	121
Fossil	min	1	9	0	3	1	7	3	18	1	3	15	2	5	20	114
Fossil	max	3	16	3	19	1	14	5	26	2	8	31	4	7	26	133
BioBurBiof	mean	1	1	1	3	0	4	6	2	1	1	3	2	5	1	30
BioBurBiof	min	0	0	0	1	0	3	4	0	0	0	2	2	4	1	27
BioBurBiof	max	1	2	1	6	1	5	10	4	1	1	4	3	5	2	35

	proc	stats	Fossil	Agriwast	BioBurBiof	OtherNat	Wetlands
0	Coal	mean	41	0	0	0	0
1	Coal	min	26	0	0	0	0
2	Coal	max	50	0	0	0	0
3	GasAndOilIndustry	mean	79	0	0	0	0
4	GasAndOilIndustry	min	69	0	0	0	0
5	GasAndOilIndustry	max	88	0	0	0	0
6	EntericFermentationAndManure	mean	0	106	0	0	0
7	EntericFermentationAndManure	min	0	97	0	0	0
8	EntericFermentationAndManure	max	0	111	0	0	0
9	LandfillsAndWaste	mean	0	59	0	0	0
10	LandfillsAndWaste	min	0	52	0	0	0
11	LandfillsAndWaste	max	0	63	0	0	0
12	Rice	mean	0	30	0	0	0
13	Rice	min	0	24	0	0	0
14	Rice	max	0	36	0	0	0
15	BiomassBurning	mean	0	0	18	0	0
16	BiomassBurning	min	0	0	15	0	0
17	BiomassBurning	max	0	0	21	0	0
18	BiofuelBurning	mean	0	0	12	0	0
19	BiofuelBurning	min	0	0	10	0	0
20	BiofuelBurning	max	0	0	14	0	0
21	Freshwater	mean	0	0	0	122	0
22	Freshwater	min	0	0	0	60	0
23	Freshwater	max	0	0	0	180	0
24	GeologicalSources	mean	0	0	0	52	0
25	GeologicalSources	min	0	0	0	32	0
26	GeologicalSources	max	0	0	0	76	0
27	WildAnimals	mean	0	0	0	10	0
28	WildAnimals	min	0	0	0	5	0
29	WildAnimals	max	0	0	0	15	0
30	Termites	mean	0	0	0	9	0
31	Termites	min	0	0	0	3	0
32	Termites	max	0	0	0	15	0
33	PermafrostAndHydrates	mean	0	0	0	3	0
34	PermafrostAndHydrates	min	0	0	0	-888	0
35	PermafrostAndHydrates	max	0	0	0	-889	0
36	Wildfires	mean	0	0	0	3	0
37	Wildfires	min	0	0	0	1	0
38	Wildfires	max	0	0	0	5	0
39	fakeSource	mean	0	0	0	0	100
40	fakeSource	min	0	0	0	0	1
41	fakeSource	max	0	0	0	0	10

	Bor_NAme			contUSA			Cent_NAme			Trop_SAme	...	SE_Asia	Temp_Eurasia_Japan			GLO			Not affected
stats	max	mean	min	max	mean	min	max	mean	min	max	...	min	max	mean	min	max	mean	min	max	mean	min
proc
Agriwast	3	3	2	23	17	15	6	5	2	23	...	18	22	20	17	206	195	178	0	0	0
BioBurBiof	1	1	0	2	1	0	1	1	0	6	...	4	2	1	1	35	30	27	0	0	0
Fossil	3	2	1	16	11	9	3	2	0	19	...	5	26	22	20	133	121	114	0	0	0
OtherNat	-99	-99	-99	-99	-99	-99	-99	-99	-99	-99	...	-99	-99	-99	-99	297	199	104	297	199	104
Wetlands	61	32	15	23	13	6	4	2	1	59	...	18	6	3	1	227	185	153	0	0	0