In [1]:
import pandas as pd
import numpy as np
import json
In [2]:
xls = pd.ExcelFile('../CDPdata/Investor CDP 2014_Public Data.xlsx')
In [4]:
sheet_names = pd.Series(xls.sheet_names)
In [5]:
sheet_names[30:40]
Out[5]:
In [3]:
p = xls.parse(43)
In [5]:
# pickle some data?
p.to_pickle('../CDPdata/sheet43_2014.pkl')
In [3]:
p = pd.read_pickle('../CDPdata/sheet43_2014.pkl')
In [4]:
pcols = p.columns.values
p[pcols[14:25]].head()
Out[4]:
In [7]:
# Emissions are not relevant, No emissions from this source, Emissions excluded due to a recent acquisition
p[pcols[16]].value_counts()
# excuses_s12 = p[["Organisation"]+pcols[17:19].tolist()]
In [20]:
excuses_s12.to_pickle("../CDPdata/excuses_s12.pkl")
In [4]:
import ghgmosh as gm
In [803]:
reload(gm)
Out[803]:
In [689]:
pscope1 = gm.get_scope1or2(p, 1, 2010)
# scope1totals = gm.get_totals(pscope1, "Country", "GICS Sector", 20)
In [440]:
pscope2 = pscope2[1:]
# pscope2.head()
In [670]:
# get scope2 totals by country and sector
pscope2 = gm.get_scope1or2(p, 2, 2010)
# scope2totals = gm.get_totals(pscope2, "Country", "GICS Sector", 20)
In [108]:
# pickle some data
scope1totals.to_pickle('mydata/2014scope1totals.pkl')
scope2totals.to_pickle('mydata/2014scope2totals.pkl')
In [176]:
scope1total = pd.read_pickle('mydata/2013scope1totals.pkl')
scope2total = pd.read_pickle('mydata/2013scope2totals.pkl')
In [539]:
scope2total.head()
Out[539]:
In [560]:
s1data = gm.write_json(scope1total, "scope1countrysector.json")
s2data = gm.write_json(scope2total, "scope2countrysector.json")
In [671]:
p2 = xls.parse(49)
In [3]:
# p2.to_pickle('../CDPdata/sheet49_2010.pkl')
p2 = pd.read_pickle("../CDPdata/sheet68_2014.pkl")
In [30]:
catgroups = p2.groupby([p2.columns[gm.scopecols[2014][3]["cat"]],'GICS Sector'])
cats = catgroups.groups.keys()
# catgroups.head()
In [50]:
cg_sums = catgroups.sum()
cg_sums.reset_index(inplace=True)
cg_sums = cg_sums.rename( columns = { cg_sums.columns[0]: "Scope3Cat", "GICS Sector": "Sector",
cg_sums.columns[4]: "CO2eTons" } )
cg_sums = cg_sums[["Scope3Cat", "Sector", "CO2eTons"]]
In [87]:
categories = list(set(cg_sums.Scope3Cat.values.tolist()))
sectors = list(set(cg_sums.Sector.values.tolist()))
# categories
cgs = cg_sums.set_index(['Scope3Cat', 'Sector'])
In [55]:
from collections import OrderedDict
from bokeh.charts import Donut
from bokeh.plotting import show, output_file
bk.output_notebook()
In [89]:
sectorsScope3 = OrderedDict()
for s in sectors:
sectorsScope3[s] = []
for c in categories:
sectorsScope3[s].append(cgs.loc[c,s]["CO2eTons"])
In [94]:
donut = Donut(sectorsScope3, categories, filename="Scope3CatsSectors.html")
donut.show()
In [673]:
pscope3 = gm.get_scope3(p2, 2010)