In [1]:
import cufflinks as cf # this is necessary to link pandas to plotly
cf.go_online()
import json
import plotly.graph_objs as go
import pandas as pd
import htmltag as HT
import PopulationEstimatorFromClf as PE
import importlib
importlib.reload(PE)
import random
import DataStructsHelperAPI as DS


Logic for creating the comma seperated aggregate data file

Not needed to run every time


In [ ]:
attribs = [ 'GID', 'AID', 'AGE',
       'EXEMPLAR_FLAG', 'INDIVIDUAL_NAME', 'NID', 'QUALITY', 'SEX', 'SPECIES',
       'VIEW_POINT','CONTRIBUTOR']

df = ImageMap.genGidAidFtrDf("../data/full_gid_aid_map.json","../data/full_aid_features.json",'../data/full_gid_aid_ftr.csv')
df_comb = ImageMap.createMstrFl("../data/full_gid_aid_ftr.csv","../data/GZC_data_tagged.json",attribs,"../data/full_gid_aid_ftr_agg.csv")

Visuals for accuracies of predictions


In [46]:
with open("../FinalResults/PopulationEstimate.json","r") as jsonFl:
    resObj = json.load(jsonFl)

In [47]:
df = pd.DataFrame(resObj)
df['Axes Name'] = df['Classifier'] + " " + df['Attribute']

df = df[['Axes Name', 'all','giraffes','zebras','shared_images_count']]
df['Error_total_pop'] = df['all'] - 3620
df['Error_zebra_pop'] = df['zebras'] - 3468
df['Error_giraffe_pop'] = df['giraffes'] - 177
df['Predicted_Shared_proportion'] = df['shared_images_count'] * 100 / 6523
dfFull = df[['Axes Name','all','Error_total_pop','zebras','Error_zebra_pop','giraffes','Error_giraffe_pop','shared_images_count','Predicted_Shared_proportion']]
dfFull['norm_error_total_pop'] = dfFull['Error_total_pop'] / 3620
dfFull['norm_error_zebra_pop'] = dfFull['Error_zebra_pop'] / 3468
dfFull['norm_error_giraffe_pop'] = dfFull['Error_giraffe_pop'] / 177
dfFull.head()


/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:10: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:11: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:12: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[47]:
Axes Name all Error_total_pop zebras Error_zebra_pop giraffes Error_giraffe_pop shared_images_count Predicted_Shared_proportion norm_error_total_pop norm_error_zebra_pop norm_error_giraffe_pop
0 bayesian sparse 0.0 -3620.0 0.0 -3468.0 0.0 -177.0 10 0.153304 -1.000000 -1.0 -1.000000
1 bayesian non_sparse 714.0 -2906.0 0.0 -3468.0 132.0 -45.0 233 3.571976 -0.802762 -1.0 -0.254237
2 bayesian non_zero 0.0 -3620.0 0.0 -3468.0 0.0 -177.0 92 1.410394 -1.000000 -1.0 -1.000000
3 bayesian abv_mean 0.0 -3620.0 0.0 -3468.0 0.0 -177.0 133 2.038939 -1.000000 -1.0 -1.000000
4 logistic sparse 0.0 -3620.0 0.0 -3468.0 0.0 -177.0 8 0.122643 -1.000000 -1.0 -1.000000

In [48]:
dfErrors= dfFull[['Axes Name','Error_total_pop','Error_zebra_pop','Error_giraffe_pop']]
dfErrors.index = df['Axes Name']
dfErrors.drop(['Axes Name'],1,inplace=True)


/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [49]:
layout = go.Layout(
    title="Estimation absolute-errors using predict-shared data",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Absolute Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig1 = dfErrors.iplot(kind='bar',filename="Absolute_Errors",layout=layout)

In [50]:
dfNormErrors= dfFull[['Axes Name','norm_error_total_pop','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNormErrors.index = df['Axes Name']
dfNormErrors.drop(['Axes Name'],1,inplace=True)


/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [51]:
layout = go.Layout(
    title="Estimation normalized-errors using predict-shared data",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Normalized Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig2 = dfNormErrors.iplot(kind='bar',filename="Norm_Errors",layout=layout)
# Error = (predicted population - actual population)
# Normalized error formula =  Error / actual population

In [52]:
dfNoOutliers = dfErrors[(abs(dfErrors['Error_total_pop']) <= 2750 )][(abs(dfErrors['Error_total_pop']) > 10)]


/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning:

Boolean Series key will be reindexed to match DataFrame index.


In [53]:
layout = go.Layout(
    title="Estimation errors using predict-shared data -no outliers",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Classifier and Attribute Selection method",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Absolute Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    ))
fig3 = dfNoOutliers.iplot(kind='bar',filename="errors_noOutliers",layout=layout)

In [54]:
# predicted shared proportion (x) vs normalized error zebra (y1) and giraffe (y2)? thanks!
dfNewPlot = dfFull[['Predicted_Shared_proportion','norm_error_zebra_pop','norm_error_giraffe_pop']]
dfNewPlot.index = dfNewPlot['Predicted_Shared_proportion']/100
dfNewPlot.drop(['Predicted_Shared_proportion'],1,inplace=True)
dfNewPlot.head()


/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[54]:
norm_error_zebra_pop norm_error_giraffe_pop
Predicted_Shared_proportion
0.001533 -1.0 -1.000000
0.035720 -1.0 -0.254237
0.014104 -1.0 -1.000000
0.020389 -1.0 -1.000000
0.001226 -1.0 -1.000000

In [55]:
layout = go.Layout(
    title="Predicted Shared Proportion versus Norm Error",
    titlefont = dict(
            size=22),
    xaxis=dict(
        title="Predicted Share Proportion",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickangle=35,
        tickfont=dict(
            size=9,
            color='black')
    ),
    yaxis=dict(
        title="Normalized Error",
        titlefont = dict(
            size=15),
        showticklabels=True,
        tickfont=dict(
            size=9,
            color='black')
    )
    )
fig4 = dfNewPlot.iplot(kind='bar',filename="predictedSharedVsError",layout=layout)

In [58]:
fullFl = HT.HTML(HT.body(HT.h2("Population Estimates using predicted shared data - master table"),
                HT.HTML(dfFull.to_html(index=False)),
                HT.HTML(fig1.embed_code),
                HT.HTML(fig2.embed_code),
                HT.HTML(fig3.embed_code),
                HT.HTML(fig4.embed_code)
               ))


outputFile = open("../FinalResults/PopulationEstimationUsingClf.html","w")
outputFile.write(fullFl)
outputFile.close()

Synthetic Experiments

Synthetic Experiment #1

Assign a score to each image (here probability) and select the top 'k' images for each contributor and share them

Calculate the population estimate

Synthetic Experiment #2

Assign a score to each image (here probability) and select the top 'x' images for each contributor where x is a random number and share them

Calculate the population estimate


In [156]:
appearanceDays = {}
for card in sdCards.keys():
    pred_results = {gid : predResults[gid] for gid in sdCards[card] if gid != '3644'}
    dfPredRes = pd.DataFrame(pred_results,index=['share']).transpose().reset_index()
    dfPredRes.columns = ['GID','share']
    appearanceDays[card] = set(pd.DataFrame.merge(dfPredRes,dfGidDays,on='GID').to_dict()['day'].values())

In [157]:
appearanceDays


Out[157]:
{'GIRM_MUGU_20,hyrule:joncrall:/media/raid/work/GIRM_MUGU_20,,,,,0': set(),
 "NNP GZC Car '10WHITE', Person 'A'": {1},
 "NNP GZC Car '11WHITE', Person 'A'": {1},
 "NNP GZC Car '12WHITE', Person 'A'": {1},
 "NNP GZC Car '13WHITE', Person 'A'": {1},
 "NNP GZC Car '13WHITE', Person 'B'": {1},
 "NNP GZC Car '14WHITE', Person 'A'": {1},
 "NNP GZC Car '15WHITE', Person 'A'": {1},
 "NNP GZC Car '15WHITE', Person 'B'": {1},
 "NNP GZC Car '15WHITE', Person 'C'": {1},
 "NNP GZC Car '15WHITE', Person 'D'": {1},
 "NNP GZC Car '16WHITE', Person 'A'": {1},
 "NNP GZC Car '17WHITE', Person 'A'": {1},
 "NNP GZC Car '17WHITE', Person 'C'": {1},
 "NNP GZC Car '1BLUE', Person 'A'": {1},
 "NNP GZC Car '1BLUE', Person 'B'": {1},
 "NNP GZC Car '1BLUE', Person 'C'": {1},
 "NNP GZC Car '1BLUE', Person 'D'": {1},
 "NNP GZC Car '1PURPLE', Person 'A'": {1},
 "NNP GZC Car '1PURPLE', Person 'B'": {1},
 "NNP GZC Car '1PURPLE', Person 'C'": set(),
 "NNP GZC Car '1PURPLE', Person 'D'": {1},
 "NNP GZC Car '1RED', Person 'A'": {2},
 "NNP GZC Car '1RED', Person 'B'": {2},
 "NNP GZC Car '1WHITE', Person 'A'": {1},
 "NNP GZC Car '1WHITE', Person 'B'": {1},
 "NNP GZC Car '1WHITE', Person 'C'": {1},
 "NNP GZC Car '25PURPLE', Person 'A'": set(),
 "NNP GZC Car '2RED', Person 'A'": {2},
 "NNP GZC Car '2RED', Person 'B'": {2},
 "NNP GZC Car '2RED', Person 'C'": {2},
 "NNP GZC Car '2RED', Person 'D'": {2},
 "NNP GZC Car '2RED', Person 'E'": {2},
 "NNP GZC Car '2WHITE', Person 'A'": {1},
 "NNP GZC Car '3PURPLE', Person 'A'": {1},
 "NNP GZC Car '3PURPLE', Person 'B'": {1},
 "NNP GZC Car '3RED', Person 'A'": {2},
 "NNP GZC Car '3RED', Person 'B'": {2},
 "NNP GZC Car '3WHITE', Person 'A'": {1},
 "NNP GZC Car '4RED', Person 'A'": {2},
 "NNP GZC Car '4RED', Person 'B'": {2},
 "NNP GZC Car '4WHITE', Person 'A'": {1},
 "NNP GZC Car '5RED', Person 'A'": {2},
 "NNP GZC Car '5RED', Person 'C'": {2},
 "NNP GZC Car '5WHITE', Person 'A'": {1},
 "NNP GZC Car '6RED', Person 'A'": {2},
 "NNP GZC Car '6RED', Person 'B'": {2},
 "NNP GZC Car '6WHITE', Person 'B'": {1},
 "NNP GZC Car '7WHITE', Person 'A'": {1},
 "NNP GZC Car '7WHITE', Person 'B'": {1},
 "NNP GZC Car '8WHITE', Person 'A'": {1},
 "NNP GZC Car '9WHITE', Person 'A'": {1},
 'NNP_Master,pachy.cs.uic.edu:jonc:/home/shared_ibeis/data/work/NNP_Master,,,,,6': set(),
 'PZ_MUGU_18,hyrule:joncrall:/media/raid/work/PZ_MUGU_18,,,,,0': set(),
 'PZ_MUGU_19,hyrule:joncrall:/media/raid/work/PZ_MUGU_19,,,,,0': set(),
 'PZ_MUGU_20,hyrule:joncrall:/media/raid/work/PZ_MUGU_20,,,,,0': set()}

In [31]:
l = PE.buildErrPlots('rgr')

In [34]:
for ifrm in l:
    print(ifrm)
    print("<p>X-axis : k <br>Y axis = Percentage Error</p>")
    print()


<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/268.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/270.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/272.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/274.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/276.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/278.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/280.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/282.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreejith_1729/284.embed" height="525px" width="100%"></iframe>
<p>X-axis : k <br>Y axis = Percentage Error</p>


In [7]:
import GetPropertiesAPI as GP

In [28]:
GP.getImageFeature(GP.getAnnotID(2929),"bbox")


Out[28]:
[[1362, 2142, 1414, 1371],
 [2828, 1894, 1105, 1131],
 [3642, 1722, 1405, 1337],
 [34, 2108, 1482, 1380]]

In [19]:
GP.getExifData(599,"gps")


Out[19]:
[[-1.391097, 36.908959]]

In [27]:
GP.getAnnotID(2929)


Out[27]:
[17266, 17267, 17268, 17269]

In [ ]: