notebook.community

Edit and run



In [ ]:

    
##Unsupervised Analysis of Days of the week



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')

import pandas as pd
import numpy as np

from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture



In [2]:

    
##GET DATA



In [3]:

    
from packages.data import get_fremont_data
data = get_fremont_data()



In [4]:

    
pivoted = data.pivot_table('Total', data.index.time, columns = data.index.date)
pivoted.plot(legend = False, alpha = 0.01)









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x115e46438>



In [5]:

    
##PRINCIPAL COMPONENT ANALYSIS



In [6]:

    
x = pivoted.fillna(0).T.values
x.shape









    Out[6]:





(1732, 24)



In [7]:

    
x2 = PCA(2, svd_solver = 'full').fit_transform(x)
x2.shape









    Out[7]:





(1732, 2)



In [8]:

    
plt.scatter(x2[:, 0], x2[:, 1])









    Out[8]:





<matplotlib.collections.PathCollection at 0x11ae00470>



In [9]:

    
##UNSUPERVISED CLUSTERING



In [10]:

    
gmm = GaussianMixture(2).fit(x)
labels = gmm.predict(x)



In [11]:

    
plt.scatter(x2[:, 0], x2[:, 1], c = labels, cmap = 'rainbow')
plt.colorbar()









    Out[11]:





<matplotlib.colorbar.Colorbar at 0x11b022390>



In [12]:

    
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

pivoted.T[labels == 0].T.plot(legend = False, alpha = 0.1, ax = ax[0])
pivoted.T[labels == 1].T.plot(legend = False, alpha = 0.1, ax = ax[1])

ax[0].set_title('Purple Cluster')
ax[1].set_title('Red Cluster');



In [13]:

    
##COMPARING WITH DAY OF WEEK



In [14]:

    
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek



In [15]:

    
plt.scatter(x2[:, 0], x2[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar();



In [16]:

    
##ANALYZING OUTLIERS



In [17]:

    
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]









    Out[17]:





DatetimeIndex(['2012-11-22', '2012-11-23', '2012-12-24', '2012-12-25',
               '2013-01-01', '2013-05-27', '2013-07-04', '2013-07-05',
               '2013-09-02', '2013-11-28', '2013-11-29', '2013-12-20',
               '2013-12-24', '2013-12-25', '2014-01-01', '2014-04-23',
               '2014-05-26', '2014-07-04', '2014-09-01', '2014-11-27',
               '2014-11-28', '2014-12-24', '2014-12-25', '2014-12-26',
               '2015-01-01', '2015-05-25', '2015-07-03', '2015-09-07',
               '2015-11-26', '2015-11-27', '2015-12-24', '2015-12-25',
               '2016-01-01', '2016-05-30', '2016-07-04', '2016-09-05',
               '2016-11-24', '2016-11-25', '2016-12-26', '2017-01-02',
               '2017-02-06', '2017-05-29'],
              dtype='datetime64[ns]', freq=None)



In [ ]: