In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
xls = pd.ExcelFile('../CDPdata/Investor CDP 2014_Public Data.xlsx')

In [4]:
sheet_names = pd.Series(xls.sheet_names)

In [5]:
sheet_names[30:40]


Out[5]:
30    CC7. Emissions Methodology
31                         CC7.1
32                         CC7.2
33                         CC7.3
34                         CC7.4
35           CC8. Emissions Data
36                        CC8.4a
37                         CC8.5
38                        CC8.6a
39                        CC8.6b
dtype: object

In [3]:
p = xls.parse(43)

In [5]:
# pickle some data?
p.to_pickle('../CDPdata/sheet43_2014.pkl')

In [3]:
p = pd.read_pickle('../CDPdata/sheet43_2014.pkl')

In [4]:
pcols = p.columns.values
p[pcols[14:25]].head()


Out[4]:
Reporting Period From Reporting Period To CC9.1a. Please break down your total gross global Scope 1 emissions by country/region CC9.1a. Country/Region CC9.1a. Scope 1 metric tonnes CO2e  
0 1 Jan 2013 31 Dec 2013 Argentina 34591
1 1 Jan 2013 31 Dec 2013 Belgium 25267
2 1 Jan 2013 31 Dec 2013 Bosnia and Herzegovina 268
3 1 Jan 2013 31 Dec 2013 Brazil 80239
4 1 Jan 2013 31 Dec 2013 China 422748

In [7]:
# Emissions are not relevant, No emissions from this source, Emissions excluded due to a recent acquisition 
p[pcols[16]].value_counts()
# excuses_s12 = p[["Organisation"]+pcols[17:19].tolist()]

In [20]:
excuses_s12.to_pickle("../CDPdata/excuses_s12.pkl")

get emissions data


In [4]:
import ghgmosh as gm

In [803]:
reload(gm)


Out[803]:
<module 'ghgmosh' from 'ghgmosh.py'>

get scope1 and scope2 totals by country and sector


In [689]:
pscope1 = gm.get_scope1or2(p, 1, 2010)
# scope1totals = gm.get_totals(pscope1, "Country", "GICS Sector", 20)

In [440]:
pscope2 = pscope2[1:]
# pscope2.head()

In [670]:
# get scope2 totals by country and sector
pscope2 = gm.get_scope1or2(p, 2, 2010)
# scope2totals = gm.get_totals(pscope2, "Country", "GICS Sector", 20)

In [108]:
# pickle some data
scope1totals.to_pickle('mydata/2014scope1totals.pkl')
scope2totals.to_pickle('mydata/2014scope2totals.pkl')

In [176]:
scope1total = pd.read_pickle('mydata/2013scope1totals.pkl')
scope2total = pd.read_pickle('mydata/2013scope2totals.pkl')

In [539]:
scope2total.head()


Out[539]:
Scope 2
Country GICS Sector
Australia Consumer Discretionary 0.294324
Consumer Staples 5.398313
Energy 0.596422
Financials 2.022157
Health Care 0.148036

In [560]:
s1data = gm.write_json(scope1total, "scope1countrysector.json")
s2data = gm.write_json(scope2total, "scope2countrysector.json")

get scope3 emissions


In [671]:
p2 = xls.parse(49)

In [3]:
# p2.to_pickle('../CDPdata/sheet49_2010.pkl')
p2 = pd.read_pickle("../CDPdata/sheet68_2014.pkl")

In [30]:
catgroups = p2.groupby([p2.columns[gm.scopecols[2014][3]["cat"]],'GICS Sector'])
cats = catgroups.groups.keys()
# catgroups.head()

In [50]:
cg_sums = catgroups.sum()
cg_sums.reset_index(inplace=True)
cg_sums = cg_sums.rename( columns = { cg_sums.columns[0]: "Scope3Cat", "GICS Sector": "Sector",
                                    cg_sums.columns[4]: "CO2eTons" } )
cg_sums = cg_sums[["Scope3Cat", "Sector", "CO2eTons"]]

In [87]:
categories = list(set(cg_sums.Scope3Cat.values.tolist()))
sectors = list(set(cg_sums.Sector.values.tolist()))
# categories
cgs = cg_sums.set_index(['Scope3Cat', 'Sector'])

In [55]:
from collections import OrderedDict

from bokeh.charts import Donut
from bokeh.plotting import show, output_file
bk.output_notebook()

In [89]:
sectorsScope3 = OrderedDict()
for s in sectors:
    sectorsScope3[s]  = []
    for c in categories:
         sectorsScope3[s].append(cgs.loc[c,s]["CO2eTons"])

In [94]:
donut = Donut(sectorsScope3, categories, filename="Scope3CatsSectors.html")
donut.show()


Wrote donut.html

In [673]:
pscope3 = gm.get_scope3(p2, 2010)