In [1]:
import cufflinks as cf # this is necessary to link pandas to plotly
cf.go_online()
import json
import plotly.graph_objs as go
import pandas as pd
import htmltag as HT
import PopulationEstimatorFromClf as PE
import importlib
importlib.reload(PE)
import random
import DataStructsHelperAPI as DS
In [ ]:
attribs = [ 'GID', 'AID', 'AGE',
'EXEMPLAR_FLAG', 'INDIVIDUAL_NAME', 'NID', 'QUALITY', 'SEX', 'SPECIES',
'VIEW_POINT','CONTRIBUTOR']
df = ImageMap.genGidAidFtrDf("../data/full_gid_aid_map.json","../data/full_aid_features.json",'../data/full_gid_aid_ftr.csv')
df_comb = ImageMap.createMstrFl("../data/full_gid_aid_ftr.csv","../data/GZC_data_tagged.json",attribs,"../data/full_gid_aid_ftr_agg.csv")
In [46]:
with open("../FinalResults/PopulationEstimate.json","r") as jsonFl:
resObj = json.load(jsonFl)
In [47]:
df = pd.DataFrame(resObj)
df['Axes Name'] = df['Classifier'] + " " + df['Attribute']
df = df[['Axes Name', 'all','giraffes','zebras','shared_images_count']]
df['Error_total_pop'] = df['all'] - 3620
df['Error_zebra_pop'] = df['zebras'] - 3468
df['Error_giraffe_pop'] = df['giraffes'] - 177
df['Predicted_Shared_proportion'] = df['shared_images_count'] * 100 / 6523
dfFull = df[['Axes Name','all','Error_total_pop','zebras','Error_zebra_pop','giraffes','Error_giraffe_pop','shared_images_count','Predicted_Shared_proportion']]
dfFull['norm_error_total_pop'] = dfFull['Error_total_pop'] / 3620
dfFull['norm_error_zebra_pop'] = dfFull['Error_zebra_pop'] / 3468
dfFull['norm_error_giraffe_pop'] = dfFull['Error_giraffe_pop'] / 177
dfFull.head()
Out[47]:
In [48]:
dfErrors= dfFull[['Axes Name','Error_total_pop','Error_zebra_pop','Error_giraffe_pop']]
dfErrors.index = df['Axes Name']
dfErrors.drop(['Axes Name'],1,inplace=True)
In [49]:
layout = go.Layout(
title="Estimation absolute-errors using predict-shared data",
titlefont = dict(
size=22),
xaxis=dict(
title="Classifier and Attribute Selection method",
titlefont = dict(
size=15),
showticklabels=True,
tickangle=35,
tickfont=dict(
size=9,
color='black')
),
yaxis=dict(
title="Absolute Error",
titlefont = dict(
size=15),
showticklabels=True,
tickfont=dict(
size=9,
color='black')
))
fig1 = dfErrors.iplot(kind='bar',filename="Absolute_Errors",layout=layout)
In [50]:
dfNormErrors= dfFull[['Axes Name','norm_error_total_pop','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNormErrors.index = df['Axes Name']
dfNormErrors.drop(['Axes Name'],1,inplace=True)
In [51]:
layout = go.Layout(
title="Estimation normalized-errors using predict-shared data",
titlefont = dict(
size=22),
xaxis=dict(
title="Classifier and Attribute Selection method",
titlefont = dict(
size=15),
showticklabels=True,
tickangle=35,
tickfont=dict(
size=9,
color='black')
),
yaxis=dict(
title="Normalized Error",
titlefont = dict(
size=15),
showticklabels=True,
tickfont=dict(
size=9,
color='black')
))
fig2 = dfNormErrors.iplot(kind='bar',filename="Norm_Errors",layout=layout)
# Error = (predicted population - actual population)
# Normalized error formula = Error / actual population
In [52]:
dfNoOutliers = dfErrors[(abs(dfErrors['Error_total_pop']) <= 2750 )][(abs(dfErrors['Error_total_pop']) > 10)]
In [53]:
layout = go.Layout(
title="Estimation errors using predict-shared data -no outliers",
titlefont = dict(
size=22),
xaxis=dict(
title="Classifier and Attribute Selection method",
titlefont = dict(
size=15),
showticklabels=True,
tickangle=35,
tickfont=dict(
size=9,
color='black')
),
yaxis=dict(
title="Absolute Error",
titlefont = dict(
size=15),
showticklabels=True,
tickfont=dict(
size=9,
color='black')
))
fig3 = dfNoOutliers.iplot(kind='bar',filename="errors_noOutliers",layout=layout)
In [54]:
# predicted shared proportion (x) vs normalized error zebra (y1) and giraffe (y2)? thanks!
dfNewPlot = dfFull[['Predicted_Shared_proportion','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNewPlot.index = dfNewPlot['Predicted_Shared_proportion']/100
dfNewPlot.drop(['Predicted_Shared_proportion'],1,inplace=True)
dfNewPlot.head()
Out[54]:
In [55]:
layout = go.Layout(
title="Predicted Shared Proportion versus Norm Error",
titlefont = dict(
size=22),
xaxis=dict(
title="Predicted Share Proportion",
titlefont = dict(
size=15),
showticklabels=True,
tickangle=35,
tickfont=dict(
size=9,
color='black')
),
yaxis=dict(
title="Normalized Error",
titlefont = dict(
size=15),
showticklabels=True,
tickfont=dict(
size=9,
color='black')
)
)
fig4 = dfNewPlot.iplot(kind='bar',filename="predictedSharedVsError",layout=layout)
In [58]:
fullFl = HT.HTML(HT.body(HT.h2("Population Estimates using predicted shared data - master table"),
HT.HTML(dfFull.to_html(index=False)),
HT.HTML(fig1.embed_code),
HT.HTML(fig2.embed_code),
HT.HTML(fig3.embed_code),
HT.HTML(fig4.embed_code)
))
outputFile = open("../FinalResults/PopulationEstimationUsingClf.html","w")
outputFile.write(fullFl)
outputFile.close()
In [156]:
appearanceDays = {}
for card in sdCards.keys():
pred_results = {gid : predResults[gid] for gid in sdCards[card] if gid != '3644'}
dfPredRes = pd.DataFrame(pred_results,index=['share']).transpose().reset_index()
dfPredRes.columns = ['GID','share']
appearanceDays[card] = set(pd.DataFrame.merge(dfPredRes,dfGidDays,on='GID').to_dict()['day'].values())
In [157]:
appearanceDays
Out[157]:
In [31]:
l = PE.buildErrPlots('rgr')
In [34]:
for ifrm in l:
print(ifrm)
print("<p>X-axis : k <br>Y axis = Percentage Error</p>")
print()
In [7]:
import GetPropertiesAPI as GP
In [28]:
GP.getImageFeature(GP.getAnnotID(2929),"bbox")
Out[28]:
In [19]:
GP.getExifData(599,"gps")
Out[19]:
In [27]:
GP.getAnnotID(2929)
Out[27]:
In [ ]: