In [1]:
    
import pandas as pd
from collections import OrderedDict
from bokeh.charts import Bar
from bokeh.plotting import output_notebook, show
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d
    
    
In [24]:
    
output_notebook()
    
In [2]:
    
import targetsmosh as tm
    
In [20]:
    
reload(tm)
    
    Out[20]:
In [ ]:
    
vcounts2013 = tm.get_vcounts(p, 2013)
tm.summary(vcounts2013, p)
    
In [4]:
    
p = pd.read_pickle('../CDPdata/sheet12_2014.pkl')
    
In [4]:
    
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12_5 = pd.read_pickle("../CDPdata/2010to2014scopes12fiveyrs.pkl")
companies = scopes12.index.levels[0].tolist()
companies5 = scopes12_5.index.levels[0].tolist()
    
In [3]:
    
s12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")
s12cos = s12_c.index.levels[0].tolist()
    
In [11]:
    
targets_yr={}
for yr in range(2010,2015):
    p = pd.read_pickle("../CDPdata/sheet"+str(tm.deets[yr]["summary"]["sheet"])+"_" + str(yr) + ".pkl")
    targets_yr[yr] = tm.get_targets(p, yr)
    targets_yr[yr]["year"] = yr
    
In [12]:
    
# oh i need to clean up 2010 for realz
counts2010 = targets_yr[2010]["Organisation"].value_counts()
multiples2010 = counts2010[counts2010>1].index.tolist()
# targets_yr[2010]["target type"].value_counts()
    
In [13]:
    
targets_yr[2010].drop_duplicates("Organisation",inplace=True)
    
In [14]:
    
len(multiples2010)
targets_yr[2010] = targets_yr[2010][targets_yr[2010]["Organisation"].isin(s12cos)]
targets_yr[2010].to_pickle("../CDPdata/2010targets_cleaned.pkl")
    
In [15]:
    
alltargets = pd.concat(targets_yr.values())
targets = alltargets[alltargets["Organisation"].isin(s12cos)]
len(targets) # 7861
    
    Out[15]:
In [16]:
    
targets.to_pickle("../CDPdata/targets_all.pkl")
len(targets.index.value_counts().index) # 2090
    
    Out[16]:
In [27]:
    
targets.head()
    
    Out[27]:
In [18]:
    
targets.set_index("year", inplace=True)
    
In [35]:
    
# targets.reset_index(inplace=True)
targets["abs count"] = 1
targets["int count"] = 1
target_yrgs = targets.groupby("year")
# target_yrgs.describe()
    
    
In [37]:
    
t_sums = target_yrgs.sum()
t_sums["abs percent"] = t_sums["has absolute"]/t_sums["abs count"]
t_sums["int percent"] = t_sums["has intensity"]/t_sums["int count"]
    
In [38]:
    
t_sums
    
    Out[38]:
In [20]:
    
valuest_summary = OrderedDict()
valuest_summary["% With Absolute Targets"] = abs_percents
valuest_summary["% With Intensity Targets"] = int_percents
    
In [21]:
    
years = []
for yr in range(2010,2015):
    years.append(str(yr))
    
In [22]:
    
colors = brewer["Spectral"][3]
bar = Bar(valuest_summary, years, title="Targets Set for Companies That Reported 5 Years", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies",
           xlabel="Year", legend ="bottom_right")
    
In [25]:
    
show(bar)
    
    
In [6]:
    
targets5 = pd.read_pickle("../CDPdata/2010to2014targets5yrs.pkl")
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl").reset_index().set_index("Organisation")
    
In [7]:
    
# combine with orginfos in order to get sector and country numbers
targets5= targets5.reset_index().set_index(["Organisation"])
    
In [8]:
    
len(set(targets5.index)) # 654
    
    Out[8]:
In [9]:
    
targets5 = targets5.join(orginfos[["Country", "GICS Sector"]])
targets5["GICS Sector"].replace("Banks", "Financials", inplace=True)
targets5 = targets5.reset_index().set_index("Organisation").sort_index()
    
In [10]:
    
targets5["has absolute"] = targets5["has absolute"].apply(lambda(x): int(x))
targets5["has intensity"] = targets5["has intensity"].apply(lambda(x): int(x))
    
In [11]:
    
targets5_c = targets5.groupby(["year","Country"])
targets5_s = targets5.groupby(["year","GICS Sector"])
targets5_csums = targets5_c.sum()
targets5_ssums = targets5_s.sum()
    
In [12]:
    
scopes12_5["GICS Sector"].replace("Banks", "Financials", inplace=True)
scopes12_5 = scopes12_5.reset_index().set_index("Organisation").sort_index()
    
In [13]:
    
scopes12_5cd = scopes12_5.groupby(["year","Country"]).describe().reset_index()
scopes12_5sd = scopes12_5.groupby(["year","GICS Sector"]).describe().reset_index()
    
In [15]:
    
scopes12_5sdis = scopes12_5sd[scopes12_5sd["level_2"]=="count"][["year", "GICS Sector","scope1", "scope2"]]
scopes12_5sdis.set_index(["year", "GICS Sector"],inplace=True)
scopes12_5cdis = scopes12_5cd[scopes12_5cd["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scopes12_5cdis.set_index(["year", "Country"],inplace=True)
    
In [16]:
    
scopes12_5cdis.head()
targets5_csums.head()
# targets5_csums.loc[2010,"USA"]
    
    Out[16]:
In [17]:
    
def mergest(s, t):
    st = {}
    for yr in range(2010, 2015):
        st[yr] = s.loc[yr][["scope1","scope2"]]
        tyr = t.loc[yr]
        st[yr] = st[yr].join(tyr[["has absolute", "has intensity"]])
        st[yr].fillna(0,inplace=True)
        st[yr]["year"] = yr
        st[yr].sort("scope1", ascending = 0, inplace=True)
    sts = pd.concat(st.values())
    sts["abspercent"]  = sts["has absolute"]/sts["scope1"]*100.0
    sts["intpercent"]  = sts["has intensity"]/sts["scope1"]*100.0
    return sts
    
In [26]:
    
scopestargets_c = mergest(scopes12_5cdis, targets5_csums).reset_index().set_index(["year", "Country"]).sort_index()
scopestargets_s = mergest(scopes12_5sdis, targets5_ssums).reset_index().set_index(["year", "GICS Sector"]).sort_index()
    
In [56]:
    
sectors = scopestargets_s.index.levels[1].tolist()
stvalues_s = OrderedDict()
for i in scopestargets_s.index.levels[0]:
    stvalues_s[str(i)] = scopestargets_s.loc[i]["intpercent"].values
    
In [72]:
    
stvalues_c = OrderedDict()
for i in scopestargets_c.index.levels[0]:
    yrvalues = []
    for c in countries:
        yrvalues.append(scopestargets_c.loc[i,c]["intpercent"]) 
    stvalues_c[str(i)] = yrvalues
    
In [69]:
    
countries = scopestargets_c.loc[2010].sort("scope1", ascending =0).index.tolist()[0:9]
# scopestargets_s.loc["Consumer Discretionary"]["abspercent"].values
    
In [75]:
    
#bar = Bar(stvalues_s, sectors, title="Intensity Targets by Sector", filename="scope12_disclosure.html",
#           stacked=False, ylabel="% of Companies", xlabel="Sector", legend ="top_right")
bar = Bar(stvalues_c, countries, title="Intensity Targets by Country", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies", xlabel="Countries", legend ="top_right")
bar.y_range.end = 100
    
In [76]:
    
barci = bar
show(bar)
    
    
In [29]:
    
xls = pd.ExcelFile('../CDPdata/Investor CDP 2013_Public Data.xlsx')
    
In [36]:
    
xls2012 = pd.ExcelFile('../CDPdata/Investor CDP 2012_Public Data.xlsx')
    
In [52]:
    
p = xls2012.parse(14)
p2 = xls2012.parse(16)
    
In [56]:
    
p.to_pickle('../CDPdata/2012sheet14.pkl')
p2.to_pickle('../CDPdata/2012sheet16.pkl')
    
In [55]:
    
# 2014
p.columns[12:20]
    
    Out[55]:
In [24]:
    
p2[p2.columns[15]].value_counts()[0:10]
    
In [16]:
    
p2[p2.columns[18]].value_counts()
    
    Out[16]:
In [ ]: