In [ ]:
##Unsupervised Analysis of Days of the week
In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
In [2]:
##GET DATA
In [3]:
from packages.data import get_fremont_data
data = get_fremont_data()
In [4]:
pivoted = data.pivot_table('Total', data.index.time, columns = data.index.date)
pivoted.plot(legend = False, alpha = 0.01)
Out[4]:
In [5]:
##PRINCIPAL COMPONENT ANALYSIS
In [6]:
x = pivoted.fillna(0).T.values
x.shape
Out[6]:
In [7]:
x2 = PCA(2, svd_solver = 'full').fit_transform(x)
x2.shape
Out[7]:
In [8]:
plt.scatter(x2[:, 0], x2[:, 1])
Out[8]:
In [9]:
##UNSUPERVISED CLUSTERING
In [10]:
gmm = GaussianMixture(2).fit(x)
labels = gmm.predict(x)
In [11]:
plt.scatter(x2[:, 0], x2[:, 1], c = labels, cmap = 'rainbow')
plt.colorbar()
Out[11]:
In [12]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
pivoted.T[labels == 0].T.plot(legend = False, alpha = 0.1, ax = ax[0])
pivoted.T[labels == 1].T.plot(legend = False, alpha = 0.1, ax = ax[1])
ax[0].set_title('Purple Cluster')
ax[1].set_title('Red Cluster');
In [13]:
##COMPARING WITH DAY OF WEEK
In [14]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
In [15]:
plt.scatter(x2[:, 0], x2[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar();
In [16]:
##ANALYZING OUTLIERS
In [17]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]
Out[17]:
In [ ]: