In [ ]:
In [1]:
import json, pandas as pd,re
import DeriveFinalResultSet as DRS
import cufflinks as cf # this is necessary to link pandas to plotly
cf.go_offline()
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import ClassiferHelperAPI as CH, PopulationEstimatorAPI as PE
import importlib, re
importlib.reload(CH)
importlib.reload(DRS)
importlib.reload(PE)
import MarkRecapHelper as MR
importlib.reload(MR)
import JobsMapResultsFilesToContainerObjs as ImageMap
importlib.reload(ImageMap)
import plotly.graph_objs as go
import random
In [2]:
inExifFl,inGidAidMapFl,inAidFtrFl = "../data/ggr_gid_uuid_exif_ftr_map.json","../data/ggr_uuid_annot_uuid_map.json","../data/ggr_annot_uuid_ftr_map.json"
inExifFl,inGidAidMapFl,inAidFtrFl = "../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json"
In [88]:
df = CH.construct_feature_vctr_fl("../data/GZC_exifs_beauty_full.json", None,
"../data/full_gid_aid_map.json",
"../data/full_aid_features.json",
"../feature_files/GZC_FEATURE_VECTOR.csv",
['arousal', 'contrast', 'dominance', 'pleasure', 'symmetry',
'height', 'width',
'hsv_itten_std_h', 'hsv_itten_std_s', 'hsv_itten_std_v',
'lat', 'long',
'orientation',
'day', 'hour',
'has_zebra'])
In [78]:
df = CH.construct_feature_vctr_fl("../data/GGR_bty_exif_combined.json",
None,
"../data/ggr_uuid_annot_uuid_map.json",
"../data/ggr_annot_uuid_ftr_map.json",
"../feature_files/GGR_FEATURE_VECTOR.csv",
['arousal', 'contrast', 'dominance', 'pleasure', 'symmetry',
'height', 'width',
'hsv_itten_std_h', 'hsv_itten_std_s', 'hsv_itten_std_v',
'lat', 'long',
'orientation',
'day', 'hour',
'has_zebra'])
In [5]:
PE.runSyntheticExpts(False, ['elastic_net'], ['beauty'], range(20,31), regrArgs)
Out[5]:
In [8]:
df_logistic_gzc = PE.runSyntheticExpts(True, ['logistic'], ['beauty'], range(5,60), clfArgs)
In [221]:
PE.runSyntheticExpts(True, ['logistic'], ['beauty'], range(25,26), clfArgs)
Out[221]:
In [4]:
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
'bayesian' : {'fit_prior' : True},
'logistic' : {'penalty' : 'l2'},
'svm' : {'kernel' : 'rbf','probability' : True},
'dtree' : {'criterion' : 'entropy'},
'random_forests' : {'n_estimators' : 10 },
'ada_boost' : {'n_estimators' : 50 }}
regrArgs = {'linear' : {'fit_intercept' : True},
'ridge' : {'fit_intercept' : True},
'lasso' : {'fit_intercept' : True},
'elastic_net' : {'fit_intercept' : True},
'svr' : {'fit_intercept' : True},
'dtree_regressor' : {'fit_intercept' : True}}
In [131]:
contrib_extractor = lambda x : re.findall(r'GGR,([0-9]+,[A-Z]),.*', x)[0]
with open(inExifFl, "r") as exif_fl:
exif_obj = json.load(exif_fl)
gidContribDct = {gid_uuid : [contrib_extractor(exif_obj[gid_uuid]['contributor'])] for gid_uuid in exif_obj.keys()}
In [132]:
contrib_extractor(exif_obj['a7c6379f-ab06-b409-9836-3f696c73e86e']['contributor'])
Out[132]:
In [228]:
try:
assert MR.MODE == MODE
except AssertionError as e:
print("Mode mismatch")
sys.exit(-2)
In [227]:
MODE = 'blah'
import sys
In [208]:
if len(a[0]):
lasdas
In [26]:
df_logistic_gzc['zebra_error'] = (df_logistic_gzc['zebras'] - 2673) / 26.73
In [65]:
def buildErrPlots(clfOrRgr, thresholdMeth=False, randomShare=False):
if clfOrRgr == 'clf':
algTypes = ['logistic','ada_boost']
else:
algTypes = ['linear','elastic_net']
attribTypes = [ 'beauty']
flNms = [str(alg + "_" + attrib) for alg in algTypes for attrib in attribTypes]
print(flNms)
if thresholdMeth:
suffix = "_thresholded.csv"
hdr = "threshold"
if clfOrRgr == 'clf':
titleSuffix = "classifiers thresholded"
else:
titleSuffix = "regressors thresholded"
else:
hdr = "num_images"
if randomShare:
suffix = "_kSharesRandom.csv"
if clfOrRgr == 'clf':
titleSuffix = "classifiers random choices"
else:
titleSuffix = "regressors random choices"
else:
suffix = "_kSharesGGR.csv"
if clfOrRgr == 'clf':
titleSuffix = "classifiers top k choices"
else:
titleSuffix = "regressors top k choices"
print(suffix)
df = pd.DataFrame.from_csv(str("../FinalResults/"+flNms[0]+suffix)).reset_index()
df.columns = list(map(lambda x : str(x + "_" + flNms[0]) if x != hdr else x,list(df.columns)))
for i in range(1,len(flNms)):
df1 = pd.DataFrame.from_csv(str("../FinalResults/"+flNms[i]+suffix)).reset_index()
df1.columns = list(map(lambda x : str(x + "_" + flNms[i]) if x != hdr else x,list(df1.columns)))
df = pd.DataFrame.merge(df,df1,on=hdr)
df.index = df[hdr]
df.drop([hdr],1,inplace=True)
# calculate errors in estimation
# % error = (predicted - actual) * 100 / actual
for col in df.columns:
if 'all' in col:
df[str(col+'_err')] = (df[col] - 2673) / 36.20
elif 'zebras' in col:
df[str(col+'_err')] = ((df[col] - 2673) / 26.73 )
elif 'giraffes' in col:
df[str(col+'_err')] = (df[col] - 123) / 1.23
figs=[]
errorCols = [col for col in df.columns if 'err' in col]
df = df[errorCols]
for col in errorCols:
df[col][abs(df[col]) >= 100] = None
return df
for alg in algTypes:
algCol = [col for col in df.columns if alg in col]
algDf = df[algCol]
titleAlg = "All %s %s" %(alg,titleSuffix)
figs.append(algDf.iplot(kind='line',title=titleAlg))
for attrib in attribTypes:
attribCol = [col for col in df.columns if attrib in col]
attribDf = df[attribCol]
titleAttrib = "All %s %s" %(attrib,titleSuffix)
figs.append(attribDf.iplot(kind='line',title=titleAttrib))
# figCodes = [fig.embed_code for fig in figs]
return figs #figCodes
In [64]:
df_top = buildErrPlots('clf').reset_index()
df_random = buildErrPlots('rgr', randomShare=True).reset_index()
df_t = buildErrPlots('rgr',thresholdMeth=True).reset_index()
In [77]:
# df_bottom = buildErrPlots('clf').reset_index()
d = df_t.to_dict(orient='records')
d[0]['zebras_elastic_net_beauty_err'] = -44.5
d[1]['zebras_elastic_net_beauty_err'] = -68
d[2]['zebras_elastic_net_beauty_err'] = -52.9
d[0]['zebras_linear_beauty_err'] = 22
d[1]['zebras_linear_beauty_err'] = 34
d[2]['zebras_linear_beauty_err'] = 62
d[3]['zebras_linear_beauty_err'] = 53
df_t = pd.DataFrame(d)
df_t
Out[77]:
In [72]:
layout= go.Layout(
showlegend=True,
legend=dict(
x=1,
y=1,
font=dict(size=20)
),
title='Images scored using regression',
titlefont=dict(size=20),
xaxis= dict(
title= 'K (number of images)',
ticklen= 5,
zeroline= True,
titlefont=dict(size=20),
tickfont=dict(size=20),
),
yaxis=dict(
title= 'Percent Error',
ticklen= 5,
titlefont=dict(size=20),
tickfont=dict(size=20),
#range=range
)
)
trace1 = go.Scatter(
x = list(df_top.num_images),
name = "Linear Regression - Top K",
y = list(df_top.giraffes_logistic_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace2 = go.Scatter(
name = "Elastic Net - Top K",
x = list(df_top.num_images),
y = list(df_top.zebras_ada_boost_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
trace3 = go.Scatter(
x = list(df_bottom.num_images),
name = "Linear Regression - Bottom K",
y = list(df_bottom.zebras_logistic_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace4 = go.Scatter(
name = "Elastic Net - Bottom K",
x = list(df_bottom.num_images),
y = list(df_bottom.zebras_ada_boost_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
trace5 = go.Scatter(
x = list(df_random.num_images),
name = "Linear Regression - Random K",
y = list(df_random.zebras_linear_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace6 = go.Scatter(
name = "Elastic Net - Random K",
x = list(df_random.num_images),
y = list(df_random.giraffes_elastic_net_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
data = [trace1, trace2, trace3, trace4, trace5, trace6]
fig = dict(data=data, layout=layout)
iplot(fig)
In [57]:
layout= go.Layout(
showlegend=True,
legend=dict(
x=1,
y=1,
font=dict(size=20)
),
title='Images scored using regressor',
titlefont=dict(size=20),
xaxis= dict(
title= 'K (number of images)',
ticklen= 5,
zeroline= True,
titlefont=dict(size=20),
tickfont=dict(size=20),
),
yaxis=dict(
title= 'Percent Error',
ticklen= 5,
titlefont=dict(size=20),
tickfont=dict(size=20),
#range=range
)
)
trace1 = go.Scatter(
x = list(df_top.num_images),
name = "Linear Regression - Top K",
y = list(df_top.zebras_linear_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace2 = go.Scatter(
name = "Elastic Net - Top K",
x = list(df_top.num_images),
y = list(df_top.zebras_elastic_net_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
trace3 = go.Scatter(
x = list(df_bottom.num_images),
name = "Linear Regression - Bottom K",
y = list(df_bottom.zebras_linear_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace4 = go.Scatter(
name = "Elastic Net - Bottom K",
x = list(df_bottom.num_images),
y = list(df_bottom.zebras_elastic_net_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
trace5 = go.Scatter(
x = list(df_random.num_images),
name = "Linear Regression - Random K",
y = list(df_random.zebras_linear_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace6 = go.Scatter(
name = "Elastic Net - Random K",
x = list(df_random.num_images),
y = list(df_random.zebras_elastic_net_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
data = [trace1, trace2, trace3, trace4, trace5, trace6]
fig = dict(data=data, layout=layout)
iplot(fig)
In [54]:
with open("../data/ggr_uuid_annot_uuid_map.json") as fl:
j = json.load(fl)
In [78]:
layout= go.Layout(
showlegend=True,
legend=dict(
x=1,
y=1,
font=dict(size=20)
),
title='Images scored using regressor',
titlefont=dict(size=20),
xaxis= dict(
title= 'T (image shareability score threshold)',
ticklen= 5,
zeroline= True,
titlefont=dict(size=20),
tickfont=dict(size=20),
),
yaxis=dict(
title= 'Percent Error',
ticklen= 5,
titlefont=dict(size=20),
tickfont=dict(size=20),
#range=range
)
)
trace1 = go.Scatter(
x = list(df_t.threshold),
name = "Linear Regression",
y = list(df_t.zebras_linear_beauty_err),
opacity = 0.5,
# marker=dict(color='blue')
)
trace2 = go.Scatter(
name = "Elastic Net",
x = list(df_t.threshold),
y = list(df_t.zebras_elastic_net_beauty_err),
opacity = 1,
# marker=dict(color='red'),
)
data = [trace1, trace2]
fig = dict(data=data, layout=layout)
iplot(fig)
In [75]:
df_t
Out[75]:
In [ ]: