notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import json



In [2]:

    
xls = pd.ExcelFile('../CDPdata/Investor CDP 2014_Public Data.xlsx')



In [4]:

    
sheet_names = pd.Series(xls.sheet_names)



In [5]:

    
sheet_names[30:40]









    Out[5]:





30    CC7. Emissions Methodology
31                         CC7.1
32                         CC7.2
33                         CC7.3
34                         CC7.4
35           CC8. Emissions Data
36                        CC8.4a
37                         CC8.5
38                        CC8.6a
39                        CC8.6b
dtype: object



In [3]:

    
p = xls.parse(43)



In [5]:

    
# pickle some data?
p.to_pickle('../CDPdata/sheet43_2014.pkl')



In [3]:

    
p = pd.read_pickle('../CDPdata/sheet43_2014.pkl')



In [4]:

    
pcols = p.columns.values
p[pcols[14:25]].head()









    Out[4]:






  
    
      
      Reporting Period
From
      Reporting Period
To
      CC9.1a. Please break down your total gross global Scope 1 emissions by country/region

CC9.1a. Country/Region
      CC9.1a. Scope 1 metric tonnes CO2e  
    
  
  
    
      0
       1 Jan 2013
       31 Dec 2013
                    Argentina
        34591
    
    
      1
       1 Jan 2013
       31 Dec 2013
                      Belgium
        25267
    
    
      2
       1 Jan 2013
       31 Dec 2013
       Bosnia and Herzegovina
          268
    
    
      3
       1 Jan 2013
       31 Dec 2013
                       Brazil
        80239
    
    
      4
       1 Jan 2013
       31 Dec 2013
                        China
       422748



In [7]:

    
# Emissions are not relevant, No emissions from this source, Emissions excluded due to a recent acquisition 
p[pcols[16]].value_counts()
# excuses_s12 = p[["Organisation"]+pcols[17:19].tolist()]



In [20]:

    
excuses_s12.to_pickle("../CDPdata/excuses_s12.pkl")

get emissions data



In [4]:

    
import ghgmosh as gm



In [803]:

    
reload(gm)









    Out[803]:





<module 'ghgmosh' from 'ghgmosh.py'>

get scope1 and scope2 totals by country and sector



In [689]:

    
pscope1 = gm.get_scope1or2(p, 1, 2010)
# scope1totals = gm.get_totals(pscope1, "Country", "GICS Sector", 20)



In [440]:

    
pscope2 = pscope2[1:]
# pscope2.head()



In [670]:

    
# get scope2 totals by country and sector
pscope2 = gm.get_scope1or2(p, 2, 2010)
# scope2totals = gm.get_totals(pscope2, "Country", "GICS Sector", 20)



In [108]:

    
# pickle some data
scope1totals.to_pickle('mydata/2014scope1totals.pkl')
scope2totals.to_pickle('mydata/2014scope2totals.pkl')



In [176]:

    
scope1total = pd.read_pickle('mydata/2013scope1totals.pkl')
scope2total = pd.read_pickle('mydata/2013scope2totals.pkl')



In [539]:

    
scope2total.head()









    Out[539]:






  
    
      
      
      Scope 2
    
    
      Country
      GICS Sector
      
    
  
  
    
      Australia
      Consumer Discretionary
       0.294324
    
    
      Consumer Staples
       5.398313
    
    
      Energy
       0.596422
    
    
      Financials
       2.022157
    
    
      Health Care
       0.148036



In [560]:

    
s1data = gm.write_json(scope1total, "scope1countrysector.json")
s2data = gm.write_json(scope2total, "scope2countrysector.json")

get scope3 emissions



In [671]:

    
p2 = xls.parse(49)



In [3]:

    
# p2.to_pickle('../CDPdata/sheet49_2010.pkl')
p2 = pd.read_pickle("../CDPdata/sheet68_2014.pkl")



In [30]:

    
catgroups = p2.groupby([p2.columns[gm.scopecols[2014][3]["cat"]],'GICS Sector'])
cats = catgroups.groups.keys()
# catgroups.head()



In [50]:

    
cg_sums = catgroups.sum()
cg_sums.reset_index(inplace=True)
cg_sums = cg_sums.rename( columns = { cg_sums.columns[0]: "Scope3Cat", "GICS Sector": "Sector",
                                    cg_sums.columns[4]: "CO2eTons" } )
cg_sums = cg_sums[["Scope3Cat", "Sector", "CO2eTons"]]



In [87]:

    
categories = list(set(cg_sums.Scope3Cat.values.tolist()))
sectors = list(set(cg_sums.Sector.values.tolist()))
# categories
cgs = cg_sums.set_index(['Scope3Cat', 'Sector'])



In [55]:

    
from collections import OrderedDict

from bokeh.charts import Donut
from bokeh.plotting import show, output_file
bk.output_notebook()



In [89]:

    
sectorsScope3 = OrderedDict()
for s in sectors:
    sectorsScope3[s]  = []
    for c in categories:
         sectorsScope3[s].append(cgs.loc[c,s]["CO2eTons"])



In [94]:

    
donut = Donut(sectorsScope3, categories, filename="Scope3CatsSectors.html")
donut.show()









    



Wrote donut.html



In [673]:

    
pscope3 = gm.get_scope3(p2, 2010)

	Reporting Period From	Reporting Period To	CC9.1a. Please break down your total gross global Scope 1 emissions by country/region CC9.1a. Country/Region	CC9.1a. Scope 1 metric tonnes CO2e
0	1 Jan 2013	31 Dec 2013	Argentina	34591
1	1 Jan 2013	31 Dec 2013	Belgium	25267
2	1 Jan 2013	31 Dec 2013	Bosnia and Herzegovina	268
3	1 Jan 2013	31 Dec 2013	Brazil	80239
4	1 Jan 2013	31 Dec 2013	China	422748

		Scope 2
Country	GICS Sector
Australia	Consumer Discretionary	0.294324
	Consumer Staples	5.398313
	Energy	0.596422
	Financials	2.022157
	Health Care	0.148036