In [8]:
from plotly.offline import plot, iplot
import cufflinks as cf, plotly.plotly as py, json, pandas as pd, numpy as np
import ClassifierCapsuleClass as ClfClass, ClassiferHelperAPI as CH
import RegressionCapsuleClass as RgrClass
import plotly.graph_objs as go
from collections import Counter
cf.go_offline()
import folium
from folium.plugins import MarkerCluster
from folium import plugins
import importlib
importlib.reload(CH)
Out[8]:
In [17]:
with open("../data/Flickr_EXIF_full.json" , "r") as exif_fl:
exif_d = json.load(exif_fl)
df = pd.DataFrame.from_dict(exif_d).transpose()
df['datetime'] = pd.to_datetime(df['date'])
df['date'] = df['datetime'].dt.date
df['date'] = pd.to_datetime(df.date)
df['year_month'] = df.date.dt.strftime("%m-%Y")
df['year'] = df.date.dt.strftime("%Y")
df['month'] = df.date.dt.strftime("%m")
df['week'] = df.date.dt.week
df = df[(df['year'] > '1970')]
df.sort_values(by='date', inplace=True)
In [18]:
df['year_month'].iplot(kind='histogram')
In [19]:
df['year'].iplot(kind='histogram')
In [20]:
df['month'].iplot(kind='histogram')
In [8]:
df_non_zero = df[(df['lat'] != 0) & (df['long'] != 0)][['lat', 'long']]
map_loc = folium.Map(location=[38.64264252590279, -51.622090714285676],tiles='Stamen Terrain',zoom_start=2)
recds = df_non_zero.to_records()
for i in range(0,len(recds)):
folium.Marker([recds[i][1],recds[i][2]],
icon=folium.Icon(color='green',icon='info-sign'), popup=recds[i][0]
).add_to(map_loc)
map_loc.save(outfile='../FinalResults/FlickrLocations.html')
In [9]:
locs = [(recd[1],recd[2]) for recd in recds]
heatmap_map = folium.Map(location=[38.64264252590279, -51.622090714285676],tiles='Stamen Terrain', zoom_start=2)
hm = plugins.HeatMap(locs)
heatmap_map.add_children(hm)
heatmap_map.save("../FinalResults/heatMap_Flickr.html")
In [21]:
df_new = df.groupby(['year','week']).count()['date']
df_dict = df_new.to_dict()
df_tups = [(' wk#'.join(map(str,key)), df_dict[key]) for key in df_dict.keys()]
df_tups = sorted(df_tups, key=lambda x : (x[0], x[1]))
x = ["'"+tup[0][2:] for tup in df_tups]
y = [tup[1] for tup in df_tups]
trace1 = go.Bar(
x = x,
y = y
)
data = [trace1]
layout = go.Layout(
xaxis=dict(tickangle=45)
)
fig = dict(data=data, layout=layout)
py.iplot(fig)
Out[21]:
In [158]:
df_train.iplot(kind='histogram',histnorm='probability')
In [13]:
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
'bayesian' : {'fit_prior' : True},
'logistic' : {'penalty' : 'l2'},
'svm' : {'kernel' : 'rbf','probability' : True},
'dtree' : {'criterion' : 'entropy'},
'random_forests' : {'n_estimators' : 10 },
'ada_boost' : {'n_estimators' : 50 }}
regrArgs = {'linear' : {'fit_intercept' : True},
'ridge' : {'fit_intercept' : True},
'lasso' : {'fit_intercept' : True},
'elastic_net' : {'fit_intercept' : True},
'svr' : {'fit_intercept' : True},
'dtree_regressor' : {'fit_intercept' : True}}
for rgrMeth in ['dummy', 'bayesian', 'logistic', 'svm', 'dtree', 'random_forests', 'ada_boost']:
train_data_fl = "/tmp/training_fl.csv"
test_data_fl = "/tmp/training_fl.csv"
obj, results = CH.trainTestClf(train_data_fl, test_data_fl, rgrMeth, 'beauty', None, clfArgs)
df_train = pd.DataFrame(list(results.items()), columns=['GID', "Probability"])
df_train.index = df_train.GID
df_train.drop(['GID'],1,inplace=True)
test_data_fl = "/tmp/testing_fl.csv"
obj, results = CH.trainTestClf(train_data_fl, test_data_fl, rgrMeth, 'beauty', None, clfArgs)
df_test = pd.DataFrame(list(results.items()), columns=['GID', "Probability"])
df_test.index = df_test.GID
df_test.drop(['GID'],1,inplace=True)
trace1 = go.Histogram(
x=df_train['Probability'],
opacity=0.75,
histnorm='probability',
name='Pred. probability - Training',
marker=dict(
color='grey')
)
trace2 = go.Histogram(
x=df_test['Probability'],
opacity=0.75,
histnorm='probability',
name='Pred. probability - Flickr',
marker=dict(
color='blue')
)
data = [trace1, trace2]
layout = go.Layout(
title='PDF %s' %rgrMeth,
xaxis=dict(
title='Share rate'
),
yaxis=dict(
title='P(X)'
),
barmode='overlay'
)
fig = go.Figure(data=data, layout=layout)
f = py.iplot(fig)
print(f.embed_code)
In [132]:
In [ ]: