In [1]:
# Jupyter Data Science Workflow

In [2]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [3]:
from jupyter_workflow.data import get_fremont_data

In [ ]:


In [4]:
data = get_fremont_data()
data.head()


Out[4]:
West East Total
Date
2012-10-03 00:00:00 4.0 9.0 13.0
2012-10-03 01:00:00 4.0 6.0 10.0
2012-10-03 02:00:00 1.0 1.0 2.0
2012-10-03 03:00:00 2.0 3.0 5.0
2012-10-03 04:00:00 6.0 1.0 7.0

In [5]:
data.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 40848 entries, 2012-10-03 00:00:00 to 2017-05-31 23:00:00
Data columns (total 3 columns):
West     40841 non-null float64
East     40841 non-null float64
Total    40841 non-null float64
dtypes: float64(3)
memory usage: 1.2 MB

In [6]:
data.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 40848 entries, 2012-10-03 00:00:00 to 2017-05-31 23:00:00
Data columns (total 3 columns):
West     40841 non-null float64
East     40841 non-null float64
Total    40841 non-null float64
dtypes: float64(3)
memory usage: 1.2 MB

In [7]:
%matplotlib inline
data.resample('W').sum().plot();



In [8]:
data.resample('W').sum().plot()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x112657128>

In [9]:
data['Total'] = data['West'] + data['East'];
ax = data.resample('D').sum().rolling(365).sum().plot()
ax.set_ylim(0,None)


Out[9]:
(0, 1058905.7)

In [10]:
data.head()


Out[10]:
West East Total
Date
2012-10-03 00:00:00 4.0 9.0 13.0
2012-10-03 01:00:00 4.0 6.0 10.0
2012-10-03 02:00:00 1.0 1.0 2.0
2012-10-03 03:00:00 2.0 3.0 5.0
2012-10-03 04:00:00 6.0 1.0 7.0

In [11]:
data


Out[11]:
West East Total
Date
2012-10-03 00:00:00 4.0 9.0 13.0
2012-10-03 01:00:00 4.0 6.0 10.0
2012-10-03 02:00:00 1.0 1.0 2.0
2012-10-03 03:00:00 2.0 3.0 5.0
2012-10-03 04:00:00 6.0 1.0 7.0
2012-10-03 05:00:00 21.0 10.0 31.0
2012-10-03 06:00:00 105.0 50.0 155.0
2012-10-03 07:00:00 257.0 95.0 352.0
2012-10-03 08:00:00 291.0 146.0 437.0
2012-10-03 09:00:00 172.0 104.0 276.0
2012-10-03 10:00:00 72.0 46.0 118.0
2012-10-03 11:00:00 10.0 32.0 42.0
2012-10-03 12:00:00 35.0 41.0 76.0
2012-10-03 13:00:00 42.0 48.0 90.0
2012-10-03 14:00:00 77.0 51.0 128.0
2012-10-03 15:00:00 72.0 92.0 164.0
2012-10-03 16:00:00 133.0 182.0 315.0
2012-10-03 17:00:00 192.0 391.0 583.0
2012-10-03 18:00:00 122.0 258.0 380.0
2012-10-03 19:00:00 59.0 69.0 128.0
2012-10-03 20:00:00 29.0 51.0 80.0
2012-10-03 21:00:00 25.0 38.0 63.0
2012-10-03 22:00:00 24.0 25.0 49.0
2012-10-03 23:00:00 5.0 12.0 17.0
2012-10-04 00:00:00 7.0 11.0 18.0
2012-10-04 01:00:00 3.0 0.0 3.0
2012-10-04 02:00:00 3.0 6.0 9.0
2012-10-04 03:00:00 0.0 3.0 3.0
2012-10-04 04:00:00 7.0 1.0 8.0
2012-10-04 05:00:00 15.0 11.0 26.0
... ... ... ...
2017-05-30 18:00:00 394.0 128.0 522.0
2017-05-30 19:00:00 160.0 52.0 212.0
2017-05-30 20:00:00 96.0 36.0 132.0
2017-05-30 21:00:00 42.0 15.0 57.0
2017-05-30 22:00:00 26.0 11.0 37.0
2017-05-30 23:00:00 5.0 2.0 7.0
2017-05-31 00:00:00 6.0 3.0 9.0
2017-05-31 01:00:00 2.0 1.0 3.0
2017-05-31 02:00:00 1.0 0.0 1.0
2017-05-31 03:00:00 1.0 0.0 1.0
2017-05-31 04:00:00 8.0 5.0 13.0
2017-05-31 05:00:00 45.0 30.0 75.0
2017-05-31 06:00:00 201.0 122.0 323.0
2017-05-31 07:00:00 488.0 293.0 781.0
2017-05-31 08:00:00 592.0 325.0 917.0
2017-05-31 09:00:00 296.0 154.0 450.0
2017-05-31 10:00:00 111.0 58.0 169.0
2017-05-31 11:00:00 102.0 51.0 153.0
2017-05-31 12:00:00 88.0 48.0 136.0
2017-05-31 13:00:00 127.0 61.0 188.0
2017-05-31 14:00:00 119.0 45.0 164.0
2017-05-31 15:00:00 199.0 69.0 268.0
2017-05-31 16:00:00 369.0 106.0 475.0
2017-05-31 17:00:00 721.0 178.0 899.0
2017-05-31 18:00:00 501.0 156.0 657.0
2017-05-31 19:00:00 212.0 83.0 295.0
2017-05-31 20:00:00 140.0 57.0 197.0
2017-05-31 21:00:00 67.0 27.0 94.0
2017-05-31 22:00:00 49.0 26.0 75.0
2017-05-31 23:00:00 48.0 32.0 80.0

40848 rows × 3 columns


In [12]:
data.groupby(data.index.time).mean().plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x112796550>

In [13]:
pivoted = data.pivot_table('Total',index=data.index.time,columns=data.index.date)
pivoted.iloc[:5,:5]


Out[13]:
2012-10-03 2012-10-04 2012-10-05 2012-10-06 2012-10-07
00:00:00 13.0 18.0 11.0 15.0 11.0
01:00:00 10.0 3.0 8.0 15.0 17.0
02:00:00 2.0 9.0 7.0 9.0 3.0
03:00:00 5.0 3.0 4.0 3.0 6.0
04:00:00 7.0 8.0 9.0 5.0 3.0

In [15]:
pivoted.plot(legend=False,alpha=0.01);



In [ ]: