In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from jupyterworkflow.data import get_fremont_data
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
In [2]:
data = get_fremont_data()
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.plot(legend=False, alpha=0.01)
Out[2]:
In [3]:
pivoted.index[:24]
Out[3]:
In [4]:
data.index
Out[4]:
In [5]:
np.unique(data.index.time)
Out[5]:
In [6]:
!head fremont.csv
In [7]:
pivoted.shape
Out[7]:
In [8]:
X = pivoted.T.fillna(0).values
X.shape
Out[8]:
In [9]:
from sklearn.decomposition import PCA
X2 = PCA(2, svd_solver='full').fit_transform(X)
In [10]:
X2.shape
Out[10]:
In [11]:
import matplotlib.pyplot as plt
plt.scatter(X2[:, 0], X2[:, 1])
Out[11]:
In [ ]:
In [12]:
gmm = GaussianMixture(2).fit(X)
labels = gmm.predict(X)
labels
Out[12]:
In [ ]:
In [13]:
plt.scatter(X2[:, 0], X2[:, 1], c=labels, cmap='rainbow')
plt.colorbar()
Out[13]:
In [14]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.01, ax=ax[0])
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.01, ax=ax[1])
ax[0].set_title('Purple Cluster')
ax[1].set_title('Red Cluster')
Out[14]:
In [15]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
plt.scatter(X2[:, 0], X2[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar()
Out[15]:
In [16]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]
Out[16]:
In [ ]: