In [1]:
from ClassiferHelperAPI import trainTestClf, trainTestRgrs
import importlib, ClassiferHelperAPI, numpy as np
importlib.reload(ClassiferHelperAPI)
import json
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from plotly import tools
import plotly.plotly as py
import cufflinks as cf
cf.go_online()


Original set of features

arousal contrast dominance height hsv_itten_std_h hsv_itten_std_s hsv_itten_std_v lat long orientation pleasure symmetry width day hour numAnimals zebra


In [134]:
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
                'bayesian' : {'fit_prior' : True},
                'logistic' : {'penalty' : 'l2'},
                'svm' : {'kernel' : 'rbf','probability' : True},
                'dtree' : {'criterion' : 'entropy'},
                'random_forests' : {'n_estimators' : 10 },
                'ada_boost' : {'n_estimators' : 50 }}

regrArgs = {'linear' : {'fit_intercept' : True},
            'ridge' : {'fit_intercept' : True},
            'lasso' : {'fit_intercept' : True},
            'elastic_net' : {'fit_intercept' : True},
            'svr' : {'fit_intercept' : True},
            'dtree_regressor' : {'fit_intercept' : True}}

ori_train_fl = "../data/BeautyFtrVector_GZC_Expt2.csv"
ori_test_fl = "../data/Flickr_Scrapes_Ftrs.csv"

train_df = pd.DataFrame.from_csv(ori_train_fl)
test_df = pd.DataFrame.from_csv(ori_test_fl)

train_df.drop(['lat', 'long', 'day'], 1, inplace=True)
test_df.drop(['lat', 'long', 'day'], 1, inplace=True)

train_fl = "/tmp/training_fl.csv"
test_fl = "/tmp/test_fl.csv"

train_df.to_csv(train_fl)
test_df.to_csv(test_fl)

In [135]:
with open("../data/Flickr_FL_URL_map.json", "r") as fl_url_map_fl:
    fl_url_map = json.load(fl_url_map_fl)

rgrTypes = ['linear', 'ridge', 'lasso', 'elastic_net', 'svr', 'dtree_regressor']
clfTypes = ['bayesian', 'logistic', 'svm', 'dtree', 'random_forests', 'ada_boost']
attrib = 'beauty'

Results using Regression

  • Build a dictionary of files and the respective predictions
  • Build a report with all features, predicted share rate and the actual image
  • Plot: Cluster of share rates

In [140]:
results = {}
for meth in rgrTypes:
    methObj,predResults = trainTestRgrs(train_fl,
                                    test_fl,
                                    meth,
                                    attrib,
                                    infoGainFl=None,
                                    methArgs = regrArgs
                                    )
    results[meth] = dict(obj = methObj, pred_results = predResults)

for meth in rgrTypes:
    pred_results = results[meth]['pred_results']
    y = list(pred_results.values())
    x = list(range(1,len(y)+1))
    
    layout= go.Layout(
                title= "Share rate distributions using %s" %meth,
                showlegend=False,
                xaxis= dict(
                    title= 'Images (n)',
                    ticklen= 5,
                    zeroline= True,
                    gridwidth= 2
                ),
                yaxis=dict(
                    title= 'Predicted Share rates',
                    ticklen= 5,
                    gridwidth= 2,
                    #range=range
                )
            )

    trace1 = go.Scatter(
                    x = x,
                    y = y,
                    mode = 'markers'
            )


    data = [trace1]

    fig = dict(data=data,layout=layout)
    figmain= py.iplot(fig,filename= 'Visual for distribution of predicted share rates using %s' %meth)
    print(figmain.embed_code)


Number of outliers identified: 1
1606 1606
Number of outliers identified: 0
1607 1607
Number of outliers identified: 0
1607 1607
Number of outliers identified: 0
1607 1607
Number of outliers identified: 0
1607 1607
Number of outliers identified: 0
1607 1607
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/615.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/617.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/619.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/621.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/623.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/625.embed" height="525px" width="100%"></iframe>

Results using Classifiers


In [141]:
results = {}
for meth in clfTypes:
    methObj,predResults = trainTestClf(train_fl,
                                    test_fl,
                                    meth,
                                    attrib,
                                    infoGainFl=None,
                                    methArgs = clfArgs
                                    )
    results[meth] = dict(obj = methObj, pred_results = predResults)

for meth in clfTypes:
    pred_results = results[meth]['obj'].predProbabs
    y = list(pred_results)
    x = list(range(1,len(y)+1))
    
    layout= go.Layout(
                title= "Prediction probability distributions using %s" %meth,
                showlegend=False,
                xaxis= dict(
                    title= 'Images (n)',
                    ticklen= 5,
                    zeroline= True,
                    gridwidth= 2
                ),
                yaxis=dict(
                    title= 'Predicted Share/No-Share probabilities',
                    ticklen= 5,
                    gridwidth= 2,
                    #range=range
                )
            )

    trace1 = go.Scatter(
                    x = x,
                    y = y,
                    mode = 'markers'
            )


    data = [trace1]

    fig = dict(data=data,layout=layout)
    figmain = py.iplot(fig,filename= 'Visual for distribution of predicted share no-share probabilities using %s' %meth)
    print(figmain.embed_code)


<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/628.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/630.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/632.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/634.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/636.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~smenon8/638.embed" height="525px" width="100%"></iframe>

Input Distribution


In [ ]:
pred_results = results['linear']['obj'].train_y
y = list(pred_results)
x = list(range(1,len(y)+1))
    
layout= go.Layout(
                title= "Training data distribution",
                showlegend=False,
                xaxis= dict(
                    title= 'Images (n)',
                    ticklen= 5,
                    zeroline= True,
                    gridwidth= 2
                ),
                yaxis=dict(
                    title= 'Share proportion',
                    ticklen= 5,
                    gridwidth= 2,
                    #range=range
                )
            )

trace1 = go.Scatter(
                    x = x,
                    y = y,
                    mode = 'markers'
            )

data = [trace1]
fig = dict(data=data,layout=layout)
fig = py.iplot(fig,filename="Expt2 Training data distributions")
fig.embed_code

Combine species data


In [1]:
import JobsMapResultsFilesToContainerObjs as ImgMap
import re

In [22]:
Flickr_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/Flickr_IBEIS_Ftrs_gid_aid_map.json",
                                  "../data/Flickr_IBEIS_Ftrs_aid_features.json",
                                  "SPECIES")

Expt2_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/experiment2_gid_aid_map.json",
                                  "../data/experiment2_aid_features.json",
                                  "SPECIES")

In [32]:
with open("../data/flickr_imgs_gid_flnm_map.json") as flID_map_fl_obj:
    gid_flID_map = json.load(flID_map_fl_obj)
Flickr_imgID_species_map = {re.findall(r'([0-9]*)_.*',gid_flID_map[gid])[0]: Flickr_gid_species_map[gid] for gid in Flickr_gid_species_map.keys()}

In [34]:
flID_newFtrs = {}

for flID in Flickr_imgID_species_map.keys():
    hasZebra = 1 if any("zebra" in s for s in Flickr_imgID_species_map[flID]) else 0
    numAnimals = len(Flickr_imgID_species_map[flID]) if hasZebra == 1 else 0

    flID_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)

In [37]:
gid_newFtrs = {}
for flID in Expt2_gid_species_map.keys():
    hasZebra = 1 if any("zebra" in s for s in Expt2_gid_species_map[flID]) else 0
    numAnimals = len(Expt2_gid_species_map[flID]) if hasZebra == 1 else 0

    gid_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)

In [138]:
df_expt2_new = pd.DataFrame.from_dict(gid_newFtrs).transpose()
df_expt2_new.reset_index(inplace=True)

df_flickr_new = pd.DataFrame.from_dict(flID_newFtrs).transpose()
df_flickr_new.reset_index(inplace=True)

In [131]:
FlickrDf = pd.DataFrame.from_csv(ori_test_fl)
FlickrDf['orientation'] = FlickrDf['rotation'].apply(rotation_to_orientation)
FlickrDf.drop(['rotation'], 1, inplace=True)
FlickrDf.reset_index(inplace=True)

expt2Df = pd.DataFrame.from_csv(ori_train_fl)
expt2Df.reset_index(inplace=True)

FlickrDf['index'] = FlickrDf['index'].astype(str)
df_flickr_new['index'] = df_flickr_new['index'].astype(str)

expt2Df['index'] = expt2Df['index'].astype(str)
df_expt2_new['index'] = df_expt2_new['index'].astype(str)

In [132]:
new_flickr_ftrs = pd.merge(FlickrDf, df_flickr_new)
new_expt2_ftrs = pd.merge(expt2Df, df_expt2_new)

new_flickr_ftrs.index = new_flickr_ftrs['index']
new_expt2_ftrs.index = new_expt2_ftrs['index']

new_flickr_ftrs.drop(['index'], 1, inplace=True)
new_expt2_ftrs.drop(['index'], 1, inplace=True)

new_flickr_ftrs.to_csv(ori_test_fl)
new_expt2_ftrs.to_csv(ori_train_fl)

In [7]:
import pandas as pd

In [3]:
full_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/full_gid_aid_map.json",
                                  "../data/full_aid_features.json",
                                  "SPECIES")

In [5]:
gid_newFtrs = {}
for flID in full_gid_species_map.keys():
    hasZebra = 1 if any("zebra" in s for s in Expt2_gid_species_map[flID]) else 0
    numAnimals = len(Expt2_gid_species_map[flID]) if hasZebra == 1 else 0

    gid_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)

In [17]:
fullFile = "../data/GZC_exifs_beauty_full.csv"
df_new = pd.DataFrame.from_dict(gid_newFtrs).transpose()
df_new.reset_index(inplace=True)
fullDf = pd.DataFrame.from_csv(fullFile)
fullDf.reset_index(inplace=True)

fullDf['index'] = fullDf['index'].astype(str)
df_new['index'] = df_new['index'].astype(str)

In [19]:
new_ftrs = pd.merge(fullDf, df_new)

new_ftrs.index = new_ftrs['index']

new_ftrs.drop(['index'], 1, inplace=True)
new_ftrs.to_csv(fullFile)

In [16]:
fullDf


Out[16]:
index arousal contrast dominance height hsv_itten_std_h hsv_itten_std_s hsv_itten_std_v lat long orientation pleasure symmetry width day hour
0 1 0.041161 2.448832 0.426170 4000 145132.844465 184809.877183 225482.969773 -1.351397 36.800210 1 0.366162 6.150837 6000 1 14
1 10 -0.067724 1.473800 0.500873 4000 105718.831503 140170.630774 133192.928522 -1.367088 36.781978 1 0.441554 11.425084 6000 1 15
2 100 0.010768 2.271823 0.445344 4000 102121.836430 139803.697872 189860.392711 -1.373255 36.800266 1 0.385749 8.839750 6000 1 16
3 1000 0.026792 2.298800 0.446958 3072 100418.690900 106270.090353 146867.861046 -1.349683 36.806025 1 0.385603 10.867455 4608 1 17
4 1001 0.013147 2.276355 0.445515 3072 107164.652028 112806.821143 144619.467648 -1.349680 36.806043 1 0.385667 10.942453 4608 1 17
5 1002 -0.016308 1.480487 0.546340 3072 110381.343514 152175.927300 154949.896217 -1.349651 36.806162 1 0.476080 11.348945 4608 1 17
6 1003 0.003803 1.610780 0.534058 3072 116605.125589 145911.415673 182644.571576 -1.349651 36.806162 1 0.463471 9.666165 4608 1 17
7 1004 0.005215 1.831523 0.550792 3072 109376.406163 130763.593118 157292.049615 -1.349651 36.806162 1 0.477868 10.961189 4608 1 17
8 1005 -0.049412 1.756933 0.557020 3072 102427.151569 140822.729057 132426.326397 -1.349651 36.806162 1 0.488551 12.629949 4608 1 17
9 1006 -0.101063 1.486260 0.491003 3072 76223.956320 192848.705592 109897.860632 -1.349996 36.805046 1 0.436200 14.113923 4608 1 17
10 1007 -0.068727 1.532040 0.486374 3072 79546.245085 163686.595718 84212.296570 -1.350004 36.805018 1 0.429059 13.097338 4608 1 17
11 1008 -0.023302 1.771302 0.574999 3072 102686.787801 113547.663104 115079.319316 -1.350125 36.804616 1 0.501646 11.760457 4608 1 17
12 1009 -0.052296 1.655842 0.597926 3072 101053.033611 128552.681831 126983.062538 -1.350135 36.804588 1 0.524357 12.952084 4608 1 17
13 101 0.055012 2.423743 0.444083 4000 142994.891693 193382.957863 250415.909007 -1.373255 36.800266 1 0.380382 6.215719 6000 1 16
14 1010 -0.073070 1.544590 0.620278 3072 103220.792906 130473.678198 139547.518101 -1.350150 36.804540 1 0.545775 12.227245 4608 1 17
15 1011 -0.054090 1.657800 0.590832 3072 102089.908049 134880.178286 120445.321565 -1.350150 36.804540 1 0.518369 12.004850 4608 1 17
16 1012 0.037852 1.654323 0.534467 3072 115421.171454 91823.205636 137036.836170 -1.347879 36.811986 1 0.460539 9.191188 4608 1 17
17 1013 0.130436 2.080397 0.528344 3072 130565.176473 101526.512977 198472.215772 -1.347879 36.811986 1 0.446283 5.251659 4608 1 17
18 1014 0.102107 2.102323 0.522868 3072 121935.899229 79761.924036 173877.363645 -1.347879 36.811986 1 0.444262 6.238882 4608 1 17
19 1015 -0.080634 1.083339 0.711285 3072 109385.440223 100301.475000 183899.108789 -1.347879 36.811986 1 0.625547 6.993671 4608 1 17
20 1016 -0.189806 0.658856 0.772935 3072 94326.473111 147757.444367 268945.890675 -1.347879 36.811986 1 0.689630 8.083892 4608 1 17
21 1017 -0.025195 1.435390 0.573592 3072 105762.484677 106917.091848 127323.422041 -1.347879 36.811986 1 0.500606 11.388584 4608 1 17
22 1018 -0.131434 1.298583 0.624997 3072 120040.524355 141225.970355 152604.489362 -1.377765 36.820685 1 0.555508 14.108658 4608 1 17
23 1019 -0.142309 1.187632 0.620119 3072 117702.605830 155874.015198 153026.402943 -1.377782 36.820582 1 0.552322 12.724450 4608 1 17
24 102 0.052833 2.365927 0.446404 4000 143097.313281 194735.896023 253024.183725 -1.373255 36.800266 1 0.382608 6.274329 6000 1 16
25 1020 -0.080860 1.430205 0.607510 3072 109461.354061 115976.520093 142665.729490 -1.378469 36.815023 1 0.535438 13.178411 4608 1 17
26 1021 -0.063634 1.535299 0.594533 3072 109591.932365 109570.631094 139876.250997 -1.378469 36.815023 1 0.522505 12.795531 4608 1 17
27 1022 -0.024324 1.544061 0.565713 3072 124468.770849 111143.615460 188321.086489 -1.378469 36.815023 1 0.493679 10.828531 4608 1 17
28 1023 -0.011146 1.652126 0.548265 3072 130266.572866 109601.529082 184731.104359 -1.378469 36.815023 1 0.477253 10.567642 4608 1 17
29 1024 -0.172662 0.840334 0.604609 3072 80929.567720 131676.525357 111024.079041 -1.378441 36.814929 1 0.541781 12.242563 4608 1 17
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9376 972 -0.118911 1.365083 0.606988 3000 114479.362738 162024.090003 140100.571422 -1.374407 36.800820 1 0.538659 10.719679 4000 1 18
9377 973 -0.136123 1.293617 0.554272 3000 100767.581086 148173.378135 59297.230565 -1.374381 36.800709 1 0.494536 8.303134 4000 1 18
9378 974 -0.139341 1.283878 0.518287 3000 85213.310268 174020.132322 16240.616491 -1.374309 36.800461 1 0.463593 7.847118 4000 1 18
9379 975 -0.122768 1.316078 0.513557 3000 95474.821818 147814.982227 74609.389682 -1.374273 36.800370 1 0.457884 6.868294 4000 1 18
9380 976 -0.130240 1.250508 0.578310 3000 98505.327203 168753.715297 76190.945661 -1.374267 36.800294 1 0.514845 9.302102 4000 1 18
9381 977 -0.134059 1.247510 0.577171 3000 92914.502547 154532.444657 95753.905344 -1.374267 36.800294 1 0.514224 8.220846 4000 1 18
9382 978 -0.040263 1.778379 0.503683 3000 115747.526303 185033.126742 165487.724199 -1.374267 36.800294 1 0.441344 6.952493 4000 1 18
9383 979 0.004641 1.941773 0.430354 3000 88503.948297 122797.207478 114425.373401 -1.374267 36.800294 1 0.373321 9.515406 4000 1 18
9384 98 -0.025492 2.000316 0.487619 4000 96507.917281 136893.743562 87648.843386 -1.373266 36.800295 1 0.425966 12.618132 6000 1 16
9385 980 -0.073141 1.608280 0.534560 3000 108486.527485 173310.067979 134775.813654 -1.374267 36.800294 1 0.471335 9.352862 4000 1 18
9386 981 0.016869 1.205074 0.470590 3000 134488.286728 109201.937263 118948.635220 -1.367125 36.781904 1 0.407086 7.367554 4000 1 18
9387 982 -0.169146 0.846736 0.566952 3000 131819.921141 218533.586006 254075.646296 -1.367125 36.781904 1 0.508736 13.299536 4000 1 18
9388 983 -0.002282 1.119010 0.434118 3000 133754.205045 126749.104319 35757.459753 -1.367120 36.782009 1 0.377258 9.844581 4000 1 18
9389 984 0.016984 1.239474 0.594435 3000 83156.326927 209791.610183 106335.693597 -1.359969 36.794605 1 0.514637 5.959576 4000 1 18
9390 985 -0.036597 1.192913 0.577097 3000 154734.373449 196008.382453 75911.263279 -1.359969 36.794605 1 0.504751 5.123990 4000 1 18
9391 986 0.284615 5.864828 0.349526 3072 106430.199789 50041.492668 251347.670004 -1.341132 36.792667 1 0.276092 15.061294 4608 1 16
9392 987 -0.217877 0.915207 0.775500 3072 117607.546593 147156.516891 303758.979721 -1.350339 36.803776 1 0.694568 8.450294 4608 1 17
9393 988 -0.189912 1.062235 0.774137 3072 121497.534295 145864.659554 302628.140715 -1.350339 36.803776 1 0.690685 8.150732 4608 1 17
9394 989 -0.209412 0.920698 0.779429 3072 118529.934274 140871.326739 306214.821708 -1.350339 36.803776 1 0.697163 8.132513 4608 1 17
9395 99 0.024696 2.400469 0.429178 4000 103386.342253 135647.979469 181192.219341 -1.373255 36.800266 1 0.370364 8.442059 6000 1 16
9396 990 -0.201844 0.890001 0.780554 3072 119563.308965 129302.487939 304329.172970 -1.350339 36.803776 1 0.697410 8.262622 4608 1 17
9397 991 -0.234761 0.810681 0.779969 3072 114177.166723 148342.237446 309997.654976 -1.350339 36.803776 1 0.700080 8.417284 4608 1 17
9398 992 -0.233003 0.878249 0.775818 3072 118262.205432 170494.439656 307404.929544 -1.350339 36.803776 1 0.696304 8.612853 4608 1 17
9399 993 -0.271714 0.817095 0.764532 3072 121223.182026 198822.606277 309835.254461 -1.350315 36.803965 1 0.690240 8.151001 4608 1 17
9400 994 -0.202593 0.986989 0.776921 3072 120710.329674 152312.312126 299603.737954 -1.350316 36.803978 1 0.694327 8.538410 4608 1 17
9401 995 -0.024251 1.903426 0.492239 3072 109705.458079 127920.634271 221190.909187 -1.350316 36.803978 1 0.429859 13.758079 4608 1 17
9402 996 -0.023748 1.990896 0.489581 3072 111592.776304 129730.929302 233356.018736 -1.350316 36.803978 1 0.427502 14.018937 4608 1 17
9403 997 -0.003467 1.862969 0.529420 3072 116033.227874 107465.097811 166095.163085 -1.350316 36.803978 1 0.460144 11.381943 4608 1 17
9404 998 -0.016920 1.815679 0.534879 3072 112711.900665 113012.973331 166844.660361 -1.350316 36.803978 1 0.466184 12.162401 4608 1 17
9405 999 0.021029 1.718418 0.511706 3072 111754.321311 124494.219761 171428.208784 -1.349706 36.805854 1 0.442395 12.175421 4608 1 17

9406 rows × 16 columns


In [ ]: