In [53]:
%matplotlib inline
import pandas as pd
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
pivoted = data.pivot_table("Total", index=data.index.time, columns=data.index.date)
pivoted.plot(legend=False, alpha=0.01)
Out[53]:
In [54]:
pivoted.index
Out[54]:
In [55]:
data.index
Out[55]:
Strange!!! Has only 12 hour not 24.
In [56]:
import numpy as np
np.unique(data.index.time)
Out[56]:
There's a time issue with the formatting.
In [57]:
!head -24 Fremont.csv
In [58]:
x = pivoted.fillna(0).T.values
x.shape
Out[58]:
In [59]:
from sklearn.decomposition import PCA
x2 = PCA(2, svd_solver="full").fit_transform(x)
In [60]:
x2.shape
Out[60]:
In [61]:
import matplotlib.pyplot as plt
plt.scatter(x2[:, 0], x2[:, 1])
Out[61]:
In [62]:
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(2)
gmm.fit(x)
labels = gmm.predict(x)
labels
Out[62]:
In [63]:
plt.scatter(x2[:, 0], x2[:, 1], c=labels)
plt.colorbar()
Out[63]:
In [64]:
plt.scatter(x2[:, 0], x2[:, 1], c=labels, cmap="rainbow")
plt.colorbar()
Out[64]:
Let's examine what's going on inside these clusters.
In [65]:
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.01)
Out[65]:
In [66]:
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.01)
Out[66]:
In [67]:
pd.DatetimeIndex(pivoted.columns).dayofweek
Out[67]:
In [68]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
In [70]:
plt.scatter(x2[:, 0], x2[:, 1], c=dayofweek, cmap="rainbow")
plt.colorbar()
Out[70]:
In [81]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels ==0) & (dayofweek < 1)]
Out[81]:
In [ ]: