In [1]:
import pandas as pd

In [13]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")

In [11]:
companies = scopes12.index.levels[0].tolist()

In [12]:
len(companies)


Out[12]:
361

In [17]:
p = pd.read_pickle("../CDPdata/sheet35_2014.pkl")
pcols = p.columns.values

In [16]:
isin = p[["Organisation","ISIN", "Ticker"]]
isin=isin[isin["ISIN"].notnull()].drop_duplicates("Organisation")
isin= isin.set_index("Organisation")
isin.head()


Out[16]:
ISIN Ticker
Organisation
Volkswagen AG DE0007664039 VOW3 GR
Vontobel Holding AG CH0012335540 VONN SW
Vopak NL0009432491 VPK NA
VP Bank Gruppe LI0010737216 VPB SW
W.W. Grainger, Inc. US3848021040 GWW US

In [19]:
isin.to_pickle("../CDPdata/2014orginfos.pkl")
# isin = pd.read_pickle("../CDPdata/2014isin.pkl")

In [14]:
isinsubset = isin.loc[companies]

In [22]:
# isinsubset.to_pickle("../CDPdata/isin_subset.pkl")
len(isinsubset.index.tolist())


Out[22]:
361

In [11]:
tsubset = pd.read_pickle("../CDPdata/tickers_subset.pkl")
tsubset.to_excel("../CDPdata/tickers_subset.xlsx")

In [17]:
isinsubset.to_excel("../CDPdata/isin_subset.xlsx")

In [26]:
revenues = pd.read_csv("../CDPdata/subset_revenues.csv")

In [27]:
# fyr 3, fyear 2013 means the year ending is March, 2014
revenues = revenues.rename(columns={"fyear":"year", "isin":"ISIN"})
revenues = revenues.set_index(["ISIN", "year"])
revenues.head()
len(revenues.index.levels[0])


Out[27]:
189

In [16]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12.head()


Out[16]:
Scope 1 Scope 2 has Scope 1and2
Organisation year
ACC 2010 11888436.00 493000 1
2011 13717736.46 473744 1
2012 15809662.00 625162 1
2013 15383520.00 565856 1
2014 15146444.00 511476 1

In [18]:
scopes12.loc["Gold Fields Limited"]


Out[18]:
Scope 1 Scope 2 has Scope 1and2
year
2010 1308764.00 5093511.00 1
2011 1377194.00 5164897.00 1
2012 1009661.76 4835939.60 1
2013 1220651.24 4607613.11 1
2014 420296.00 814968.00 1

In [23]:
isinsubset = isinsubset.reset_index()
isinsubset = isinsubset.rename(columns={"index":"Organisation"})
isinsubset = isinsubset.set_index("Organisation")
isinsubset.head()


Out[23]:
ISIN
Organisation
ACC INE012A01025
ACEA SpA IT0001207098
AECI Ltd Ord ZAE000000220
AFLAC Incorporated US0010551028
AMG Advanced Metallurgical Group NV NL0000888691

In [24]:
ghgrevenues = scopes12.join(isinsubset, how="outer")
ghgrevenues.reset_index(inplace=True)
ghgrevenues.set_index(["ISIN", "year"], inplace=True)
ghgrevenues.head()


Out[24]:
Organisation Scope 1 Scope 2 has Scope 1and2
ISIN year
INE012A01025 2010 ACC 11888436.00 493000 1
2011 ACC 13717736.46 473744 1
2012 ACC 15809662.00 625162 1
2013 ACC 15383520.00 565856 1
2014 ACC 15146444.00 511476 1

In [25]:
ghgrevenues.loc["ZAE000018123"]


Out[25]:
Organisation Scope 1 Scope 2 has Scope 1and2
year
2010 Gold Fields Limited 1308764.00 5093511.00 1
2011 Gold Fields Limited 1377194.00 5164897.00 1
2012 Gold Fields Limited 1009661.76 4835939.60 1
2013 Gold Fields Limited 1220651.24 4607613.11 1
2014 Gold Fields Limited 420296.00 814968.00 1

In [79]:
# len(revenues.index.levels[0])
# ghgrevs = revenues.join(ghgrevenues)
def add_total_and_intensity(p):
    p["1and2 total"] = p["Scope 1"] + p["Scope 2"]
    p["1and2 intensity"] = p['1and2 total']/p["revt"]
    return p

In [100]:
ghgrevs.to_pickle("../CDPdata/ghgrevenues.pkl")
ghgrevs[["1and2 total", "1and2 intensity", "cogs", "revt", "Organisation"]].head()


Out[100]:
1and2 total 1and2 intensity cogs revt Organisation
ISIN year
AT0000746409 2010 2688664 840.743211 1729.712 3197.961 VERBUND AG
2011 3687587 975.965223 2344.788 3778.400 VERBUND AG
2012 4503481 1444.617059 1430.076 3117.422 VERBUND AG
2013 4140240 1371.871635 1448.004 3017.950 VERBUND AG
AU000000WOW2 2010 2968261 57.419503 37593.500 51694.300 Woolworths Limited

In [32]:
rs = revenues.drop_duplicates(["sedol", "datadate"])
rs.loc["ZAE000018123"]


Out[32]:
gvkey indfmt datafmt consol popsrc fyr datadate cogs revt teq exchg sedol conm costat fic incorp
year
2010 17947 INDL HIST_STD C I 6 20100630 12295.1 31565.3 45448.9 177 6280215 GOLD FIELDS LTD A ZAF NaN
2010 17947 INDL HIST_STD C I 12 20101231 10869.9 18308.1 46622.5 177 6280215 GOLD FIELDS LTD A ZAF NaN
2011 17947 INDL HIST_STD C I 12 20111231 22804.0 41876.8 48061.5 177 6280215 GOLD FIELDS LTD A ZAF NaN
2012 17947 INDL HIST_STD C I 12 20121231 15989.9 28915.8 53157.4 177 6280215 GOLD FIELDS LTD A ZAF NaN
2013 17947 INDL HIST_STD C I 12 20131231 17961.4 27900.6 41827.5 177 6280215 GOLD FIELDS LTD A ZAF NaN

In [2]:
xls = pd.ExcelFile('../CDPdata/northamerica compustat.xlsx')

In [40]:
p = xls.parse(0)

In [86]:
tickerrevs = p.rename(columns={"Current ISO Country Code - Incorporation": "Country","Data Year - Fiscal": "year",
                               "Revenue - Total": "revt", "Ticker Symbol": "Ticker",
                               "Cost of Goods Sold": "cogs"})
tickerrevs = tickerrevs.reset_index().set_index(["Ticker", "year"])
tickerrevs.drop("index", 1, inplace=True)
ghgrevs_nam = tickerrevs.join(ghgs)
ghgrevs_nam = ghgrevs_nam.reset_index().set_index(["ISIN", "year"])
ghgrevs_nam = add_total_and_intensity(ghgrevs_nam)

In [85]:
# scopes12 = scopes12.reset_index().set_index(["Organisation"])
# ghgs = scopes12.join(orginfos)
#ghgs.reset_index(inplace=True)
#ghgs["Ticker"] = ghgs["Ticker"].apply(lambda x: str(x).split(" ")[0])
# ghgs = ghgs.reset_index().set_index(["Ticker","year"])
ghgs.head()
# scopes12.head()


Out[85]:
Organisation Scope 1 Scope 2 has Scope 1and2 ISIN
Ticker year
ACC 2010 ACC 11888436.00 493000 1 INE012A01025
2011 ACC 13717736.46 473744 1 INE012A01025
2012 ACC 15809662.00 625162 1 INE012A01025
2013 ACC 15383520.00 565856 1 INE012A01025
2014 ACC 15146444.00 511476 1 INE012A01025

In [93]:
ghgrevs_gl = pd.read_pickle("../CDPdata/ghgrevenues.pkl")
ghgrevs_gl.columns


Out[93]:
Index([u'gvkey', u'indfmt', u'datafmt', u'consol', u'popsrc', u'fyr', u'datadate', u'cogs', u'revt', u'teq', u'exchg', u'sedol', u'conm', u'costat', u'fic', u'incorp', u'Organisation', u'Scope 1', u'Scope 2', u'has Scope 1and2', u'1and2 total', u'1and2 intensity'], dtype='object')

In [95]:
keepcols = ["cogs", "revt", "Organisation", "Scope 1", "Scope 2", "has Scope 1and2", "1and2 total", "1and2 intensity"]
ghgrevs = pd.concat([ghgrevs_gl[keepcols], ghgrevs_nam[keepcols]])

In [102]:
len(ghgrevs.index.levels[0])
# ghgrevs.head()


Out[102]:
320

In [ ]: