In [8]:
from plotly.offline import plot, iplot
import cufflinks as cf, plotly.plotly as py, json, pandas as pd, numpy as np
import ClassifierCapsuleClass as ClfClass, ClassiferHelperAPI as CH
import RegressionCapsuleClass as RgrClass
import plotly.graph_objs as go
from collections import Counter
cf.go_offline()
import folium
from folium.plugins import MarkerCluster
from folium import plugins
import importlib
importlib.reload(CH)


Out[8]:
<module 'ClassiferHelperAPI' from '/Users/sreejithmenon/Google Drive/CodeBase/AWESOME/script/ClassiferHelperAPI.py'>

In [17]:
with open("../data/Flickr_EXIF_full.json" , "r") as exif_fl:
    exif_d = json.load(exif_fl)

df = pd.DataFrame.from_dict(exif_d).transpose()
df['datetime'] = pd.to_datetime(df['date'])
df['date'] = df['datetime'].dt.date

df['date'] = pd.to_datetime(df.date)
df['year_month'] = df.date.dt.strftime("%m-%Y")
df['year'] = df.date.dt.strftime("%Y")
df['month'] = df.date.dt.strftime("%m")
df['week'] = df.date.dt.week

df = df[(df['year'] > '1970')]
df.sort_values(by='date', inplace=True)

Month-wise distribution over the years


In [18]:
df['year_month'].iplot(kind='histogram')


Annual Distribution of images


In [19]:
df['year'].iplot(kind='histogram')


General trend - month wise distribution


In [20]:
df['month'].iplot(kind='histogram')



In [8]:
df_non_zero = df[(df['lat'] != 0) & (df['long'] != 0)][['lat', 'long']]

map_loc = folium.Map(location=[38.64264252590279, -51.622090714285676],tiles='Stamen Terrain',zoom_start=2)

recds = df_non_zero.to_records()
for i in range(0,len(recds)):
    folium.Marker([recds[i][1],recds[i][2]],
              icon=folium.Icon(color='green',icon='info-sign'), popup=recds[i][0]
              ).add_to(map_loc)
    
map_loc.save(outfile='../FinalResults/FlickrLocations.html')

In [9]:
locs = [(recd[1],recd[2]) for recd in recds]
heatmap_map = folium.Map(location=[38.64264252590279, -51.622090714285676],tiles='Stamen Terrain', zoom_start=2)
hm = plugins.HeatMap(locs)
heatmap_map.add_children(hm)

heatmap_map.save("../FinalResults/heatMap_Flickr.html")

Weekly distribution of Flickr Images


In [21]:
df_new = df.groupby(['year','week']).count()['date']
df_dict = df_new.to_dict()
df_tups = [(' wk#'.join(map(str,key)), df_dict[key]) for key in df_dict.keys()]
df_tups = sorted(df_tups, key=lambda x : (x[0], x[1]))
x = ["'"+tup[0][2:] for tup in df_tups]
y = [tup[1] for tup in df_tups]
trace1 = go.Bar(
            x = x,
            y = y
        )

data = [trace1]
layout = go.Layout(
    xaxis=dict(tickangle=45)
)
fig = dict(data=data, layout=layout)
py.iplot(fig)


Out[21]:

In [158]:
df_train.iplot(kind='histogram',histnorm='probability')



In [13]:
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
            'bayesian' : {'fit_prior' : True},
            'logistic' : {'penalty' : 'l2'},
            'svm' : {'kernel' : 'rbf','probability' : True},
            'dtree' : {'criterion' : 'entropy'},
            'random_forests' : {'n_estimators' : 10 },
            'ada_boost' : {'n_estimators' : 50 }}

regrArgs = {'linear' : {'fit_intercept' : True},
            'ridge' : {'fit_intercept' : True},
            'lasso' : {'fit_intercept' : True},
            'elastic_net' : {'fit_intercept' : True},
            'svr' : {'fit_intercept' : True},
            'dtree_regressor' : {'fit_intercept' : True}}

for rgrMeth in ['dummy', 'bayesian', 'logistic', 'svm', 'dtree', 'random_forests', 'ada_boost']:
    train_data_fl = "/tmp/training_fl.csv"
    test_data_fl = "/tmp/training_fl.csv"
    obj, results = CH.trainTestClf(train_data_fl, test_data_fl, rgrMeth, 'beauty', None, clfArgs)

    
    df_train = pd.DataFrame(list(results.items()), columns=['GID', "Probability"])
    df_train.index = df_train.GID
    df_train.drop(['GID'],1,inplace=True)
    
    test_data_fl = "/tmp/testing_fl.csv"
    obj, results = CH.trainTestClf(train_data_fl, test_data_fl, rgrMeth, 'beauty', None, clfArgs)
    df_test = pd.DataFrame(list(results.items()), columns=['GID', "Probability"])

    df_test.index = df_test.GID
    df_test.drop(['GID'],1,inplace=True)
    
    trace1 = go.Histogram(
        x=df_train['Probability'],
        opacity=0.75,
        histnorm='probability',
        name='Pred. probability - Training',
        marker=dict(
            color='grey')
    )
    trace2 = go.Histogram(
        x=df_test['Probability'],
        opacity=0.75,
        histnorm='probability',
        name='Pred. probability - Flickr',
        marker=dict(
            color='blue')
    )

    data = [trace1, trace2]

    layout = go.Layout(
        title='PDF %s' %rgrMeth,
        xaxis=dict(
            title='Share rate'
        ),
        yaxis=dict(
            title='P(X)'
        ),
        barmode='overlay'
    )

    fig = go.Figure(data=data, layout=layout)
    f = py.iplot(fig)
    print(f.embed_code)


<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/334.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/336.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/338.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/340.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/342.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/344.embed" height="525px" width="100%"></iframe>
<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~sreefall15/346.embed" height="525px" width="100%"></iframe>

In [132]:


In [ ]: