In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
In [2]:
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
In [3]:
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.plot(legend=False, alpha=0.01);
In [4]:
X = pivoted.fillna(0).T.values
X.shape
X2 = PCA(2, svd_solver='full').fit_transform(X)
X2.shape
Out[4]:
In [5]:
plt.scatter(X2[:, 0], X2[:, 1]);
In [6]:
gmm = GaussianMixture(2)
gmm.fit(X)
labels = gmm.predict(X)
In [7]:
plt.scatter(X2[:, 0], X2[:, 1], c=labels, cmap='rainbow');
plt.colorbar();
In [8]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.01, ax=ax[0]);
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.01, ax=ax[1]);
ax[0].set_title('Purple Cluster');
ax[1].set_title('Red Cluster');
In [9]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
In [10]:
plt.scatter(X2[:, 0], X2[:, 1], c=dayofweek, cmap='rainbow');
plt.colorbar();
In [11]:
weekday_label = 0
weekend_label = 1
if pivoted.T[labels == 1].max().max() > pivoted.T[labels == 0].max().max() :
weekday_label = 1
weekend_label = 0
In [12]:
dates = pd.DatetimeIndex(pivoted.columns)
dates [(labels == weekend_label) & (dayofweek < 5)]
Out[12]:
In [ ]: