Relevant resources:
Coding time:
Relevant resources:
Fremont Bridge Bike Counter: the website where you can explore the data
A Whirlwind Tour of Python: Jake Vanderplas book introducing the Python programming language, aimed at scientists and engineers.
Python Data Science Handbook: Jake Vanderplas book introducing Python's data science tools, including an introduction to the IPython, Pandas, and Matplotlib tools used here.
In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
import seaborn as sns
sns.set_style('whitegrid',{'font.sans-serif':['SimHei','Arial']})
In [1]:
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
data.head()
Out[1]:
In [6]:
data.plot();
In [9]:
data.resample('W').sum().plot();
In [10]:
data.resample('M').sum().plot();
Relevant Resources:
In [11]:
data.resample('W').sum().plot();
In [13]:
data.resample('D').sum().rolling(365).sum().plot();
In [14]:
ax = data.resample('D').sum().rolling(365).sum().plot();
ax.set_ylim(0, None);
In [17]:
data.groupby(data.index.time).mean().plot();
In [18]:
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.iloc[:5, :3]
Out[18]:
In [19]:
pivoted.plot(legend=False, alpha=0.01);
对一周不同时段(工作日、节假日和双休日)的骑车人数进行聚类分析
In [20]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-dark')
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
In [21]:
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.plot(legend=False, alpha=0.01);
In [22]:
X = pivoted.fillna(0).T.values
X.shape
Out[22]:
In [23]:
X2 = PCA(2, svd_solver='full').fit_transform(X)
In [24]:
X2.shape
Out[24]:
In [25]:
plt.scatter(X2[:, 0], X2[:, 1]);
In [26]:
gmm = GaussianMixture(2).fit(X)
labels = gmm.predict(X)
labels
Out[26]:
In [8]:
plt.scatter(X2[:, 0], X2[:, 1], c=labels, cmap='rainbow');
plt.colorbar();
In [27]:
fig, ax = plt.subplots(1, 2, figsize=(14, 4))
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.1, ax=ax[0]);
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.1, ax=ax[1]);
ax[0].set_title('Pupple Cluster')
ax[1].set_title('Red Cluster');
In [28]:
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
dayofweek
Out[28]:
下图是双休日与节假日的自行车车数
In [29]:
plt.scatter(X2[:, 0], X2[:, 1], c=dayofweek, cmap='rainbow');
plt.colorbar();
In [30]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels==1) & (dayofweek<5)]
Out[30]:
Relevant Resources:
*This post was composed within an IPython notebook; you can view a static version [here](http://nbviewer.ipython.org/url/jakevdp.github.com/downloads/notebooks/JupyterWorkflow.ipynb) or download the full source [here](http://jakevdp.github.com/downloads/notebooks/JupyterWorkflow.ipynb).*