In [1]:
from ClassiferHelperAPI import trainTestClf, trainTestRgrs
import importlib, ClassiferHelperAPI, numpy as np
importlib.reload(ClassiferHelperAPI)
import json
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from plotly import tools
import plotly.plotly as py
import cufflinks as cf
cf.go_online()
In [134]:
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
'bayesian' : {'fit_prior' : True},
'logistic' : {'penalty' : 'l2'},
'svm' : {'kernel' : 'rbf','probability' : True},
'dtree' : {'criterion' : 'entropy'},
'random_forests' : {'n_estimators' : 10 },
'ada_boost' : {'n_estimators' : 50 }}
regrArgs = {'linear' : {'fit_intercept' : True},
'ridge' : {'fit_intercept' : True},
'lasso' : {'fit_intercept' : True},
'elastic_net' : {'fit_intercept' : True},
'svr' : {'fit_intercept' : True},
'dtree_regressor' : {'fit_intercept' : True}}
ori_train_fl = "../data/BeautyFtrVector_GZC_Expt2.csv"
ori_test_fl = "../data/Flickr_Scrapes_Ftrs.csv"
train_df = pd.DataFrame.from_csv(ori_train_fl)
test_df = pd.DataFrame.from_csv(ori_test_fl)
train_df.drop(['lat', 'long', 'day'], 1, inplace=True)
test_df.drop(['lat', 'long', 'day'], 1, inplace=True)
train_fl = "/tmp/training_fl.csv"
test_fl = "/tmp/test_fl.csv"
train_df.to_csv(train_fl)
test_df.to_csv(test_fl)
In [135]:
with open("../data/Flickr_FL_URL_map.json", "r") as fl_url_map_fl:
fl_url_map = json.load(fl_url_map_fl)
rgrTypes = ['linear', 'ridge', 'lasso', 'elastic_net', 'svr', 'dtree_regressor']
clfTypes = ['bayesian', 'logistic', 'svm', 'dtree', 'random_forests', 'ada_boost']
attrib = 'beauty'
In [140]:
results = {}
for meth in rgrTypes:
methObj,predResults = trainTestRgrs(train_fl,
test_fl,
meth,
attrib,
infoGainFl=None,
methArgs = regrArgs
)
results[meth] = dict(obj = methObj, pred_results = predResults)
for meth in rgrTypes:
pred_results = results[meth]['pred_results']
y = list(pred_results.values())
x = list(range(1,len(y)+1))
layout= go.Layout(
title= "Share rate distributions using %s" %meth,
showlegend=False,
xaxis= dict(
title= 'Images (n)',
ticklen= 5,
zeroline= True,
gridwidth= 2
),
yaxis=dict(
title= 'Predicted Share rates',
ticklen= 5,
gridwidth= 2,
#range=range
)
)
trace1 = go.Scatter(
x = x,
y = y,
mode = 'markers'
)
data = [trace1]
fig = dict(data=data,layout=layout)
figmain= py.iplot(fig,filename= 'Visual for distribution of predicted share rates using %s' %meth)
print(figmain.embed_code)
In [141]:
results = {}
for meth in clfTypes:
methObj,predResults = trainTestClf(train_fl,
test_fl,
meth,
attrib,
infoGainFl=None,
methArgs = clfArgs
)
results[meth] = dict(obj = methObj, pred_results = predResults)
for meth in clfTypes:
pred_results = results[meth]['obj'].predProbabs
y = list(pred_results)
x = list(range(1,len(y)+1))
layout= go.Layout(
title= "Prediction probability distributions using %s" %meth,
showlegend=False,
xaxis= dict(
title= 'Images (n)',
ticklen= 5,
zeroline= True,
gridwidth= 2
),
yaxis=dict(
title= 'Predicted Share/No-Share probabilities',
ticklen= 5,
gridwidth= 2,
#range=range
)
)
trace1 = go.Scatter(
x = x,
y = y,
mode = 'markers'
)
data = [trace1]
fig = dict(data=data,layout=layout)
figmain = py.iplot(fig,filename= 'Visual for distribution of predicted share no-share probabilities using %s' %meth)
print(figmain.embed_code)
In [ ]:
pred_results = results['linear']['obj'].train_y
y = list(pred_results)
x = list(range(1,len(y)+1))
layout= go.Layout(
title= "Training data distribution",
showlegend=False,
xaxis= dict(
title= 'Images (n)',
ticklen= 5,
zeroline= True,
gridwidth= 2
),
yaxis=dict(
title= 'Share proportion',
ticklen= 5,
gridwidth= 2,
#range=range
)
)
trace1 = go.Scatter(
x = x,
y = y,
mode = 'markers'
)
data = [trace1]
fig = dict(data=data,layout=layout)
fig = py.iplot(fig,filename="Expt2 Training data distributions")
fig.embed_code
In [1]:
import JobsMapResultsFilesToContainerObjs as ImgMap
import re
In [22]:
Flickr_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/Flickr_IBEIS_Ftrs_gid_aid_map.json",
"../data/Flickr_IBEIS_Ftrs_aid_features.json",
"SPECIES")
Expt2_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/experiment2_gid_aid_map.json",
"../data/experiment2_aid_features.json",
"SPECIES")
In [32]:
with open("../data/flickr_imgs_gid_flnm_map.json") as flID_map_fl_obj:
gid_flID_map = json.load(flID_map_fl_obj)
Flickr_imgID_species_map = {re.findall(r'([0-9]*)_.*',gid_flID_map[gid])[0]: Flickr_gid_species_map[gid] for gid in Flickr_gid_species_map.keys()}
In [34]:
flID_newFtrs = {}
for flID in Flickr_imgID_species_map.keys():
hasZebra = 1 if any("zebra" in s for s in Flickr_imgID_species_map[flID]) else 0
numAnimals = len(Flickr_imgID_species_map[flID]) if hasZebra == 1 else 0
flID_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)
In [37]:
gid_newFtrs = {}
for flID in Expt2_gid_species_map.keys():
hasZebra = 1 if any("zebra" in s for s in Expt2_gid_species_map[flID]) else 0
numAnimals = len(Expt2_gid_species_map[flID]) if hasZebra == 1 else 0
gid_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)
In [138]:
df_expt2_new = pd.DataFrame.from_dict(gid_newFtrs).transpose()
df_expt2_new.reset_index(inplace=True)
df_flickr_new = pd.DataFrame.from_dict(flID_newFtrs).transpose()
df_flickr_new.reset_index(inplace=True)
In [131]:
FlickrDf = pd.DataFrame.from_csv(ori_test_fl)
FlickrDf['orientation'] = FlickrDf['rotation'].apply(rotation_to_orientation)
FlickrDf.drop(['rotation'], 1, inplace=True)
FlickrDf.reset_index(inplace=True)
expt2Df = pd.DataFrame.from_csv(ori_train_fl)
expt2Df.reset_index(inplace=True)
FlickrDf['index'] = FlickrDf['index'].astype(str)
df_flickr_new['index'] = df_flickr_new['index'].astype(str)
expt2Df['index'] = expt2Df['index'].astype(str)
df_expt2_new['index'] = df_expt2_new['index'].astype(str)
In [132]:
new_flickr_ftrs = pd.merge(FlickrDf, df_flickr_new)
new_expt2_ftrs = pd.merge(expt2Df, df_expt2_new)
new_flickr_ftrs.index = new_flickr_ftrs['index']
new_expt2_ftrs.index = new_expt2_ftrs['index']
new_flickr_ftrs.drop(['index'], 1, inplace=True)
new_expt2_ftrs.drop(['index'], 1, inplace=True)
new_flickr_ftrs.to_csv(ori_test_fl)
new_expt2_ftrs.to_csv(ori_train_fl)
In [7]:
import pandas as pd
In [3]:
full_gid_species_map = ImgMap.extractImageFeaturesFromMap("../data/full_gid_aid_map.json",
"../data/full_aid_features.json",
"SPECIES")
In [5]:
gid_newFtrs = {}
for flID in full_gid_species_map.keys():
hasZebra = 1 if any("zebra" in s for s in Expt2_gid_species_map[flID]) else 0
numAnimals = len(Expt2_gid_species_map[flID]) if hasZebra == 1 else 0
gid_newFtrs[flID] = dict(zebra = hasZebra, numAnimals = numAnimals)
In [17]:
fullFile = "../data/GZC_exifs_beauty_full.csv"
df_new = pd.DataFrame.from_dict(gid_newFtrs).transpose()
df_new.reset_index(inplace=True)
fullDf = pd.DataFrame.from_csv(fullFile)
fullDf.reset_index(inplace=True)
fullDf['index'] = fullDf['index'].astype(str)
df_new['index'] = df_new['index'].astype(str)
In [19]:
new_ftrs = pd.merge(fullDf, df_new)
new_ftrs.index = new_ftrs['index']
new_ftrs.drop(['index'], 1, inplace=True)
new_ftrs.to_csv(fullFile)
In [16]:
fullDf
Out[16]:
In [ ]: