In [1]:
import pandas as pd
In [13]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
In [11]:
companies = scopes12.index.levels[0].tolist()
In [12]:
len(companies)
Out[12]:
In [17]:
p = pd.read_pickle("../CDPdata/sheet35_2014.pkl")
pcols = p.columns.values
In [16]:
isin = p[["Organisation","ISIN", "Ticker"]]
isin=isin[isin["ISIN"].notnull()].drop_duplicates("Organisation")
isin= isin.set_index("Organisation")
isin.head()
Out[16]:
In [19]:
isin.to_pickle("../CDPdata/2014orginfos.pkl")
# isin = pd.read_pickle("../CDPdata/2014isin.pkl")
In [14]:
isinsubset = isin.loc[companies]
In [22]:
# isinsubset.to_pickle("../CDPdata/isin_subset.pkl")
len(isinsubset.index.tolist())
Out[22]:
In [11]:
tsubset = pd.read_pickle("../CDPdata/tickers_subset.pkl")
tsubset.to_excel("../CDPdata/tickers_subset.xlsx")
In [17]:
isinsubset.to_excel("../CDPdata/isin_subset.xlsx")
In [26]:
revenues = pd.read_csv("../CDPdata/subset_revenues.csv")
In [27]:
# fyr 3, fyear 2013 means the year ending is March, 2014
revenues = revenues.rename(columns={"fyear":"year", "isin":"ISIN"})
revenues = revenues.set_index(["ISIN", "year"])
revenues.head()
len(revenues.index.levels[0])
Out[27]:
In [16]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12.head()
Out[16]:
In [18]:
scopes12.loc["Gold Fields Limited"]
Out[18]:
In [23]:
isinsubset = isinsubset.reset_index()
isinsubset = isinsubset.rename(columns={"index":"Organisation"})
isinsubset = isinsubset.set_index("Organisation")
isinsubset.head()
Out[23]:
In [24]:
ghgrevenues = scopes12.join(isinsubset, how="outer")
ghgrevenues.reset_index(inplace=True)
ghgrevenues.set_index(["ISIN", "year"], inplace=True)
ghgrevenues.head()
Out[24]:
In [25]:
ghgrevenues.loc["ZAE000018123"]
Out[25]:
In [79]:
# len(revenues.index.levels[0])
# ghgrevs = revenues.join(ghgrevenues)
def add_total_and_intensity(p):
p["1and2 total"] = p["Scope 1"] + p["Scope 2"]
p["1and2 intensity"] = p['1and2 total']/p["revt"]
return p
In [100]:
ghgrevs.to_pickle("../CDPdata/ghgrevenues.pkl")
ghgrevs[["1and2 total", "1and2 intensity", "cogs", "revt", "Organisation"]].head()
Out[100]:
In [32]:
rs = revenues.drop_duplicates(["sedol", "datadate"])
rs.loc["ZAE000018123"]
Out[32]:
In [2]:
xls = pd.ExcelFile('../CDPdata/northamerica compustat.xlsx')
In [40]:
p = xls.parse(0)
In [86]:
tickerrevs = p.rename(columns={"Current ISO Country Code - Incorporation": "Country","Data Year - Fiscal": "year",
"Revenue - Total": "revt", "Ticker Symbol": "Ticker",
"Cost of Goods Sold": "cogs"})
tickerrevs = tickerrevs.reset_index().set_index(["Ticker", "year"])
tickerrevs.drop("index", 1, inplace=True)
ghgrevs_nam = tickerrevs.join(ghgs)
ghgrevs_nam = ghgrevs_nam.reset_index().set_index(["ISIN", "year"])
ghgrevs_nam = add_total_and_intensity(ghgrevs_nam)
In [85]:
# scopes12 = scopes12.reset_index().set_index(["Organisation"])
# ghgs = scopes12.join(orginfos)
#ghgs.reset_index(inplace=True)
#ghgs["Ticker"] = ghgs["Ticker"].apply(lambda x: str(x).split(" ")[0])
# ghgs = ghgs.reset_index().set_index(["Ticker","year"])
ghgs.head()
# scopes12.head()
Out[85]:
In [93]:
ghgrevs_gl = pd.read_pickle("../CDPdata/ghgrevenues.pkl")
ghgrevs_gl.columns
Out[93]:
In [95]:
keepcols = ["cogs", "revt", "Organisation", "Scope 1", "Scope 2", "has Scope 1and2", "1and2 total", "1and2 intensity"]
ghgrevs = pd.concat([ghgrevs_gl[keepcols], ghgrevs_nam[keepcols]])
In [102]:
len(ghgrevs.index.levels[0])
# ghgrevs.head()
Out[102]:
In [ ]: