In [1]:

    
import cufflinks as cf # this is necessary to link pandas to plotly
cf.go_online()
import json
import plotly.graph_objs as go
import pandas as pd
import htmltag as HT
import PopulationEstimatorFromClf as PE
import importlib
importlib.reload(PE)
import random
import DataStructsHelperAPI as DS

Logic for creating the comma seperated aggregate data file

Not needed to run every time



In [ ]:

    
attribs = [ 'GID', 'AID', 'AGE',
       'EXEMPLAR_FLAG', 'INDIVIDUAL_NAME', 'NID', 'QUALITY', 'SEX', 'SPECIES',
       'VIEW_POINT','CONTRIBUTOR']

df = ImageMap.genGidAidFtrDf("../data/full_gid_aid_map.json","../data/full_aid_features.json",'../data/full_gid_aid_ftr.csv')
df_comb = ImageMap.createMstrFl("../data/full_gid_aid_ftr.csv","../data/GZC_data_tagged.json",attribs,"../data/full_gid_aid_ftr_agg.csv")

Visuals for accuracies of predictions



In [46]:

    
with open("../FinalResults/PopulationEstimate.json","r") as jsonFl:
    resObj = json.load(jsonFl)



In [47]:

    
df = pd.DataFrame(resObj)
df['Axes Name'] = df['Classifier'] + " " + df['Attribute']

df = df[['Axes Name', 'all','giraffes','zebras','shared_images_count']]
df['Error_total_pop'] = df['all'] - 3620
df['Error_zebra_pop'] = df['zebras'] - 3468
df['Error_giraffe_pop'] = df['giraffes'] - 177
df['Predicted_Shared_proportion'] = df['shared_images_count'] * 100 / 6523
dfFull = df[['Axes Name','all','Error_total_pop','zebras','Error_zebra_pop','giraffes','Error_giraffe_pop','shared_images_count','Predicted_Shared_proportion']]
dfFull['norm_error_total_pop'] = dfFull['Error_total_pop'] / 3620
dfFull['norm_error_zebra_pop'] = dfFull['Error_zebra_pop'] / 3468
dfFull['norm_error_giraffe_pop'] = dfFull['Error_giraffe_pop'] / 177
dfFull.head()









    



/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:10: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:11: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:12: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy







    Out[47]:






  
    
      
      Axes Name
      all
      Error_total_pop
      zebras
      Error_zebra_pop
      giraffes
      Error_giraffe_pop
      shared_images_count
      Predicted_Shared_proportion
      norm_error_total_pop
      norm_error_zebra_pop
      norm_error_giraffe_pop
    
  
  
    
      0
      bayesian sparse
      0.0
      -3620.0
      0.0
      -3468.0
      0.0
      -177.0
      10
      0.153304
      -1.000000
      -1.0
      -1.000000
    
    
      1
      bayesian non_sparse
      714.0
      -2906.0
      0.0
      -3468.0
      132.0
      -45.0
      233
      3.571976
      -0.802762
      -1.0
      -0.254237
    
    
      2
      bayesian non_zero
      0.0
      -3620.0
      0.0
      -3468.0
      0.0
      -177.0
      92
      1.410394
      -1.000000
      -1.0
      -1.000000
    
    
      3
      bayesian abv_mean
      0.0
      -3620.0
      0.0
      -3468.0
      0.0
      -177.0
      133
      2.038939
      -1.000000
      -1.0
      -1.000000
    
    
      4
      logistic sparse
      0.0
      -3620.0
      0.0
      -3468.0
      0.0
      -177.0
      8
      0.122643
      -1.000000
      -1.0
      -1.000000



In [48]:

    
dfErrors= dfFull[['Axes Name','Error_total_pop','Error_zebra_pop','Error_giraffe_pop']]
dfErrors.index = df['Axes Name']
dfErrors.drop(['Axes Name'],1,inplace=True)









    



/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [49]:

    
layout = go.Layout(
    title="Estimation absolute-errors using predict-shared data",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Absolute Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig1 = dfErrors.iplot(kind='bar',filename="Absolute_Errors",layout=layout)



In [50]:

    
dfNormErrors= dfFull[['Axes Name','norm_error_total_pop','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNormErrors.index = df['Axes Name']
dfNormErrors.drop(['Axes Name'],1,inplace=True)









    



/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [51]:

    
layout = go.Layout(
    title="Estimation normalized-errors using predict-shared data",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Normalized Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig2 = dfNormErrors.iplot(kind='bar',filename="Norm_Errors",layout=layout)
# Error = (predicted population - actual population)
# Normalized error formula =  Error / actual population



In [52]:

    
dfNoOutliers = dfErrors[(abs(dfErrors['Error_total_pop']) <= 2750 )][(abs(dfErrors['Error_total_pop']) > 10)]









    



/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning:

Boolean Series key will be reindexed to match DataFrame index.



In [53]:

    
layout = go.Layout(
    title="Estimation errors using predict-shared data -no outliers",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Absolute Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig3 = dfNoOutliers.iplot(kind='bar',filename="errors_noOutliers",layout=layout)



In [54]:

    
# predicted shared proportion (x) vs normalized error zebra (y1) and giraffe (y2)? thanks!
dfNewPlot = dfFull[['Predicted_Shared_proportion','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNewPlot.index = dfNewPlot['Predicted_Shared_proportion']/100
dfNewPlot.drop(['Predicted_Shared_proportion'],1,inplace=True)
dfNewPlot.head()









    



/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy







    Out[54]:






  
    
      
      norm_error_zebra_pop
      norm_error_giraffe_pop
    
    
      Predicted_Shared_proportion
      
      
    
  
  
    
      0.001533
      -1.0
      -1.000000
    
    
      0.035720
      -1.0
      -0.254237
    
    
      0.014104
      -1.0
      -1.000000
    
    
      0.020389
      -1.0
      -1.000000
    
    
      0.001226
      -1.0
      -1.000000



In [55]:

    
layout = go.Layout(
    title="Predicted Shared Proportion versus Norm Error",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Predicted Share Proportion",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Normalized Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    )
    )
fig4 = dfNewPlot.iplot(kind='bar',filename="predictedSharedVsError",layout=layout)



In [58]:

    
fullFl = HT.HTML(HT.body(HT.h2("Population Estimates using predicted shared data - master table"),
                HT.HTML(dfFull.to_html(index=False)),
                HT.HTML(fig1.embed_code),
                HT.HTML(fig2.embed_code),
                HT.HTML(fig3.embed_code),
                HT.HTML(fig4.embed_code)
               ))


outputFile = open("../FinalResults/PopulationEstimationUsingClf.html","w")
outputFile.write(fullFl)
outputFile.close()

Synthetic Experiments

Synthetic Experiment #1

Calculate the population estimate

Synthetic Experiment #2

Calculate the population estimate



In [156]:

    
appearanceDays = {}
for card in sdCards.keys():
    pred_results = {gid : predResults[gid] for gid in sdCards[card] if gid != '3644'}
    dfPredRes = pd.DataFrame(pred_results,index=['share']).transpose().reset_index()
    dfPredRes.columns = ['GID','share']
    appearanceDays[card] = set(pd.DataFrame.merge(dfPredRes,dfGidDays,on='GID').to_dict()['day'].values())



In [157]:

    
appearanceDays









    Out[157]:





{'GIRM_MUGU_20,hyrule:joncrall:/media/raid/work/GIRM_MUGU_20,,,,,0': set(),
 "NNP GZC Car '10WHITE', Person 'A'": {1},
 "NNP GZC Car '11WHITE', Person 'A'": {1},
 "NNP GZC Car '12WHITE', Person 'A'": {1},
 "NNP GZC Car '13WHITE', Person 'A'": {1},
 "NNP GZC Car '13WHITE', Person 'B'": {1},
 "NNP GZC Car '14WHITE', Person 'A'": {1},
 "NNP GZC Car '15WHITE', Person 'A'": {1},
 "NNP GZC Car '15WHITE', Person 'B'": {1},
 "NNP GZC Car '15WHITE', Person 'C'": {1},
 "NNP GZC Car '15WHITE', Person 'D'": {1},
 "NNP GZC Car '16WHITE', Person 'A'": {1},
 "NNP GZC Car '17WHITE', Person 'A'": {1},
 "NNP GZC Car '17WHITE', Person 'C'": {1},
 "NNP GZC Car '1BLUE', Person 'A'": {1},
 "NNP GZC Car '1BLUE', Person 'B'": {1},
 "NNP GZC Car '1BLUE', Person 'C'": {1},
 "NNP GZC Car '1BLUE', Person 'D'": {1},
 "NNP GZC Car '1PURPLE', Person 'A'": {1},
 "NNP GZC Car '1PURPLE', Person 'B'": {1},
 "NNP GZC Car '1PURPLE', Person 'C'": set(),
 "NNP GZC Car '1PURPLE', Person 'D'": {1},
 "NNP GZC Car '1RED', Person 'A'": {2},
 "NNP GZC Car '1RED', Person 'B'": {2},
 "NNP GZC Car '1WHITE', Person 'A'": {1},
 "NNP GZC Car '1WHITE', Person 'B'": {1},
 "NNP GZC Car '1WHITE', Person 'C'": {1},
 "NNP GZC Car '25PURPLE', Person 'A'": set(),
 "NNP GZC Car '2RED', Person 'A'": {2},
 "NNP GZC Car '2RED', Person 'B'": {2},
 "NNP GZC Car '2RED', Person 'C'": {2},
 "NNP GZC Car '2RED', Person 'D'": {2},
 "NNP GZC Car '2RED', Person 'E'": {2},
 "NNP GZC Car '2WHITE', Person 'A'": {1},
 "NNP GZC Car '3PURPLE', Person 'A'": {1},
 "NNP GZC Car '3PURPLE', Person 'B'": {1},
 "NNP GZC Car '3RED', Person 'A'": {2},
 "NNP GZC Car '3RED', Person 'B'": {2},
 "NNP GZC Car '3WHITE', Person 'A'": {1},
 "NNP GZC Car '4RED', Person 'A'": {2},
 "NNP GZC Car '4RED', Person 'B'": {2},
 "NNP GZC Car '4WHITE', Person 'A'": {1},
 "NNP GZC Car '5RED', Person 'A'": {2},
 "NNP GZC Car '5RED', Person 'C'": {2},
 "NNP GZC Car '5WHITE', Person 'A'": {1},
 "NNP GZC Car '6RED', Person 'A'": {2},
 "NNP GZC Car '6RED', Person 'B'": {2},
 "NNP GZC Car '6WHITE', Person 'B'": {1},
 "NNP GZC Car '7WHITE', Person 'A'": {1},
 "NNP GZC Car '7WHITE', Person 'B'": {1},
 "NNP GZC Car '8WHITE', Person 'A'": {1},
 "NNP GZC Car '9WHITE', Person 'A'": {1},
 'NNP_Master,pachy.cs.uic.edu:jonc:/home/shared_ibeis/data/work/NNP_Master,,,,,6': set(),
 'PZ_MUGU_18,hyrule:joncrall:/media/raid/work/PZ_MUGU_18,,,,,0': set(),
 'PZ_MUGU_19,hyrule:joncrall:/media/raid/work/PZ_MUGU_19,,,,,0': set(),
 'PZ_MUGU_20,hyrule:joncrall:/media/raid/work/PZ_MUGU_20,,,,,0': set()}



In [31]:

    
l = PE.buildErrPlots('rgr')



In [34]:

    
for ifrm in l:
    print(ifrm)
    print("<p>X-axis : k <br>Y axis = Percentage Error</p>")
    print()









    



<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/268.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/270.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/272.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/274.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/276.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/278.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/280.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/282.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/284.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>



In [7]:

    
import GetPropertiesAPI as GP



In [28]:

    
GP.getImageFeature(GP.getAnnotID(2929),"bbox")









    Out[28]:





[[1362, 2142, 1414, 1371],
 [2828, 1894, 1105, 1131],
 [3642, 1722, 1405, 1337],
 [34, 2108, 1482, 1380]]



In [19]:

    
GP.getExifData(599,"gps")









    Out[19]:





[[-1.391097, 36.908959]]



In [27]:

    
GP.getAnnotID(2929)









    Out[27]:





[17266, 17267, 17268, 17269]



In [ ]:

	Axes Name	all	Error_total_pop	Error_zebra_pop	giraffes	Error_giraffe_pop	shared_images_count	Predicted_Shared_proportion	norm_error_total_pop	norm_error_zebra_pop	norm_error_giraffe_pop
0	bayesian sparse	0.0	-3620.0	-3468.0	0.0	-177.0	10	0.153304	-1.000000	-1.0	-1.000000
1	bayesian non_sparse	714.0	-2906.0	-3468.0	132.0	-45.0	233	3.571976	-0.802762	-1.0	-0.254237
2	bayesian non_zero	0.0	-3620.0	-3468.0	0.0	-177.0	92	1.410394	-1.000000	-1.0	-1.000000
3	bayesian abv_mean	0.0	-3620.0	-3468.0	0.0	-177.0	133	2.038939	-1.000000	-1.0	-1.000000
4	logistic sparse	0.0	-3620.0	-3468.0	0.0	-177.0	8	0.122643	-1.000000	-1.0	-1.000000

	norm_error_zebra_pop	norm_error_giraffe_pop
Predicted_Shared_proportion
0.001533	-1.0	-1.000000
0.035720	-1.0	-0.254237
0.014104	-1.0	-1.000000
0.020389	-1.0	-1.000000
0.001226	-1.0	-1.000000

Logic for creating the comma seperated aggregate data file

Visuals for accuracies of predictions

Synthetic Experiments

Synthetic Experiment #1

Assign a score to each image (here probability) and select the top 'k' images for each contributor and share them

Calculate the population estimate

Synthetic Experiment #2

Assign a score to each image (here probability) and select the top 'x' images for each contributor where x is a random number and share them

Calculate the population estimate