Time Series Data


In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Working with Datetime Objects


In [2]:
from datetime import datetime

In [3]:
my_year = 2017
my_month = 10
my_day = 14
my_hour = 15
my_minute = 30
my_second = 15

The Datetime Object


In [4]:
my_date = datetime(my_year, my_month, my_day)

In [5]:
my_date


Out[5]:
datetime.datetime(2017, 10, 14, 0, 0)

In [6]:
my_date_time = datetime(my_year, my_month, my_day, my_hour, my_minute, my_second)

In [7]:
my_date_time


Out[7]:
datetime.datetime(2017, 10, 14, 15, 30, 15)

In [8]:
type(my_date_time)


Out[8]:
datetime.datetime

In [9]:
my_date_time.day


Out[9]:
14

In [10]:
my_date_time.month


Out[10]:
10

Making a datetime indexed dataframe


In [11]:
first_two = [datetime(2016, 1, 1), datetime(2016, 1, 2)]

In [12]:
first_two


Out[12]:
[datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 1, 2, 0, 0)]

In [13]:
type(first_two)


Out[13]:
list

In [14]:
type(first_two[0])


Out[14]:
datetime.datetime

In [15]:
dt_ind = pd.DatetimeIndex(first_two)

In [17]:
dt_ind


Out[17]:
DatetimeIndex(['2016-01-01', '2016-01-02'], dtype='datetime64[ns]', freq=None)

In [18]:
data = np.random.randn(2, 2)

In [19]:
cols = ["a", "b"]

In [20]:
df = pd.DataFrame(data, dt_ind, cols)

In [21]:
df


Out[21]:
a b
2016-01-01 0.578400 -1.105279
2016-01-02 -0.106859 -0.116752

In [22]:
df.index.argmax()


Out[22]:
1

In [23]:
df.index.argmin()


Out[23]:
0

In [24]:
df.index.min()


Out[24]:
Timestamp('2016-01-01 00:00:00')

In [25]:
type(df.index.min())


Out[25]:
pandas._libs.tslib.Timestamp

Time Resampling


In [27]:
df = pd.read_csv("time_data/walmart_stock.csv")

In [28]:
df.head()


Out[28]:
Date Open High Low Close Volume Adj Close
0 2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
1 2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2 2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
3 2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
4 2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [30]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
Date         1258 non-null object
Open         1258 non-null float64
High         1258 non-null float64
Low          1258 non-null float64
Close        1258 non-null float64
Volume       1258 non-null int64
Adj Close    1258 non-null float64
dtypes: float64(5), int64(1), object(1)
memory usage: 68.9+ KB

In [33]:
df["Date"] = pd.to_datetime(df["Date"]) # Be aware of formatting!!

In [34]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
Date         1258 non-null datetime64[ns]
Open         1258 non-null float64
High         1258 non-null float64
Low          1258 non-null float64
Close        1258 non-null float64
Volume       1258 non-null int64
Adj Close    1258 non-null float64
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 68.9 KB

In [35]:
df.set_index("Date", inplace=True)

In [37]:
df.head()


Out[37]:
Open High Low Close Volume Adj Close
Date
2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215
Quicker (but less controlled) way:

In [41]:
df2 = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)

In [42]:
df2.head()


Out[42]:
Open High Low Close Volume Adj Close
Date
2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [47]:
df2.index


Out[47]:
DatetimeIndex(['2012-01-03', '2012-01-04', '2012-01-05', '2012-01-06',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-17',
               ...
               '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
               '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
               '2016-12-29', '2016-12-30'],
              dtype='datetime64[ns]', name='Date', length=1258, freq=None)

In [46]:
type(df2.index[0])


Out[46]:
pandas._libs.tslib.Timestamp

Resampling


In [49]:
df.resample(rule="A")


Out[49]:
DatetimeIndexResampler [freq=<YearEnd: month=12>, axis=0, closed=right, label=right, convention=start, base=0]
Alias Description
B business day frequency
C custom business day frequency (experimental)
D calendar day frequency
W weekly frequency
M month end frequency
SM semi-month end frequency (15th and end of month)
BM business month end frequency
CBM custom business month end frequency
MS month start frequency
SMS semi-month start frequency (1st and 15th)
BMS business month start frequency
CBMS custom business month start frequency
Q quarter end frequency
BQ business quarter endfrequency
QS quarter start frequency
BQS business quarter start frequency
A year end frequency
BA business year end frequency
AS year start frequency
BAS business year start frequency
BH business hour frequency
H hourly frequency
T, min minutely frequency
S secondly frequency
L, ms milliseconds
U, us microseconds
N nanoseconds

In [51]:
df.resample(rule="A").mean()


Out[51]:
Open High Low Close Volume Adj Close
Date
2012-12-31 67.158680 67.602120 66.786520 67.215120 9.239015e+06 59.389349
2013-12-31 75.264048 75.729405 74.843055 75.320516 6.951496e+06 68.147179
2014-12-31 77.274524 77.740040 76.864405 77.327381 6.515612e+06 71.709712
2015-12-31 72.569405 73.064167 72.034802 72.491111 9.040769e+06 68.831426
2016-12-31 69.481349 70.019643 69.023492 69.547063 9.371645e+06 68.054229

In [52]:
df.resample(rule="BQ").mean()


Out[52]:
Open High Low Close Volume Adj Close
Date
2012-03-30 60.462903 60.807258 60.160484 60.519516 8.850221e+06 52.881839
2012-06-29 62.888889 63.400159 62.592222 63.057143 1.155795e+07 55.594392
2012-09-28 73.081587 73.549682 72.717619 73.174921 7.871587e+06 64.891949
2012-12-31 72.174678 72.623226 71.647741 72.079839 8.660958e+06 64.161672
2013-03-29 70.898834 71.393000 70.552666 71.026000 8.482003e+06 63.581880
2013-06-28 76.873906 77.417813 76.413281 76.885781 7.656086e+06 69.393600
2013-09-30 75.637969 76.030469 75.213594 75.629063 5.796625e+06 68.676662
2013-12-31 77.372656 77.805313 76.924531 77.472813 5.966927e+06 70.651242
2014-03-31 75.339344 75.802787 74.863278 75.337213 7.050402e+06 69.106983
2014-06-30 77.036508 77.391111 76.689524 77.066508 6.048765e+06 71.284646
2014-09-30 75.674843 76.062501 75.375938 75.748593 5.550031e+06 70.520795
2014-12-31 80.952969 81.607500 80.432344 81.059844 7.431025e+06 75.797780
2015-03-31 85.096229 85.699017 84.491803 85.107213 7.209713e+06 79.994746
2015-06-30 76.678730 77.066191 76.191111 76.492540 7.481765e+06 72.501821
2015-09-30 68.884375 69.366094 68.328594 68.825313 8.883412e+06 65.666534
2015-12-31 60.269688 60.780156 59.776563 60.193281 1.247800e+07 57.743235
2016-03-31 65.564754 66.346065 65.015574 65.776557 1.198041e+07 63.580834
2016-06-30 69.350000 69.857656 68.918906 69.438437 8.751062e+06 67.762903
2016-09-30 72.808594 73.230938 72.371719 72.782812 8.236991e+06 71.530300
2016-12-30 70.026985 70.478889 69.609048 70.021111 8.628798e+06 69.150314

In [53]:
df.resample(rule="A").max()


Out[53]:
Open High Low Close Volume Adj Close
Date
2012-12-31 77.599998 77.599998 76.690002 77.150002 38007300 68.568371
2013-12-31 81.209999 81.370003 80.820000 81.209999 25683700 73.929868
2014-12-31 87.080002 88.089996 86.480003 87.540001 22812400 81.707680
2015-12-31 90.800003 90.970001 89.250000 90.470001 80898100 84.914216
2016-12-31 74.500000 75.190002 73.629997 74.300003 35076700 73.233524

In [54]:
def first_day(entry):
    return entry[0]

In [55]:
df.resample("A").apply(first_day)


Out[55]:
Open High Low Close Volume Adj Close
Date
2012-12-31 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2013-12-31 68.930000 69.239998 68.449997 69.239998 10390800 61.879708
2014-12-31 78.720001 79.470001 78.500000 78.910004 6878000 72.254228
2015-12-31 86.269997 86.720001 85.550003 85.900002 4501800 80.624861
2016-12-31 60.500000 61.490002 60.360001 61.459999 11989200 59.289713

In [56]:
df["Close"].resample("A").mean().plot(kind="bar")


Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fabd9639048>

In [59]:
df["Close"].resample("M").mean().plot(kind="bar", figsize=(16, 6))


Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fabd59bbf60>

Time Shifts


In [1]:
import pandas as pd

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)

In [4]:
df.head()


Out[4]:
Open High Low Close Volume Adj Close
Date
2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [5]:
df.tail()


Out[5]:
Open High Low Close Volume Adj Close
Date
2016-12-23 69.430000 69.750000 69.360001 69.540001 4803900 69.032411
2016-12-27 69.300003 69.820000 69.250000 69.699997 4435700 69.191240
2016-12-28 69.940002 70.000000 69.260002 69.309998 4875700 68.804087
2016-12-29 69.209999 69.519997 69.120003 69.260002 4298400 68.754456
2016-12-30 69.120003 69.430000 68.830002 69.120003 6889500 68.615479
Shift up by one

In [15]:
df.shift(periods=1).head() # first gets filled in by NaN


Out[15]:
Open High Low Close Volume Adj Close
Date
2012-01-03 NaN NaN NaN NaN NaN NaN
2012-01-04 59.970001 61.060001 59.869999 60.330002 12668800.0 52.619235
2012-01-05 60.209999 60.349998 59.470001 59.709999 9593300.0 52.078475
2012-01-06 59.349998 59.619999 58.369999 59.419998 12768200.0 51.825539
2012-01-09 59.419998 59.450001 58.869999 59.000000 8069400.0 51.459220

In [14]:
df.shift(periods=-1).tail() # last gets filled by NaN


Out[14]:
Open High Low Close Volume Adj Close
Date
2016-12-23 69.300003 69.820000 69.250000 69.699997 4435700.0 69.191240
2016-12-27 69.940002 70.000000 69.260002 69.309998 4875700.0 68.804087
2016-12-28 69.209999 69.519997 69.120003 69.260002 4298400.0 68.754456
2016-12-29 69.120003 69.430000 68.830002 69.120003 6889500.0 68.615479
2016-12-30 NaN NaN NaN NaN NaN NaN

In [13]:
df.head()


Out[13]:
Open High Low Close Volume Adj Close
Date
2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [16]:
df.tshift(freq="M").head() # shifts to end of month


Out[16]:
Open High Low Close Volume Adj Close
Date
2012-01-31 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-31 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-31 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-31 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-31 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [17]:
df.shift(freq="A").head() # shifts to end of year


Out[17]:
Open High Low Close Volume Adj Close
Date
2012-12-31 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-12-31 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-12-31 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-12-31 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-12-31 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

Rolling and Expanding


In [18]:
df.head()


Out[18]:
Open High Low Close Volume Adj Close
Date
2012-01-03 59.970001 61.060001 59.869999 60.330002 12668800 52.619235
2012-01-04 60.209999 60.349998 59.470001 59.709999 9593300 52.078475
2012-01-05 59.349998 59.619999 58.369999 59.419998 12768200 51.825539
2012-01-06 59.419998 59.450001 58.869999 59.000000 8069400 51.459220
2012-01-09 59.029999 59.549999 58.919998 59.180000 6679300 51.616215

In [19]:
df["Open"].plot(figsize=(16, 6))


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fcf154680b8>

In [21]:
df.rolling(window=7).mean().head(20) # not inplace!


Out[21]:
Open High Low Close Volume Adj Close
Date
2012-01-03 NaN NaN NaN NaN NaN NaN
2012-01-04 NaN NaN NaN NaN NaN NaN
2012-01-05 NaN NaN NaN NaN NaN NaN
2012-01-06 NaN NaN NaN NaN NaN NaN
2012-01-09 NaN NaN NaN NaN NaN NaN
2012-01-10 NaN NaN NaN NaN NaN NaN
2012-01-11 59.495714 59.895714 59.074285 59.440000 9.007414e+06 51.842984
2012-01-12 59.469999 59.744285 59.007143 59.321429 8.231357e+06 51.739567
2012-01-13 59.322857 59.638571 58.941428 59.297143 7.965071e+06 51.718386
2012-01-17 59.397143 59.708571 59.105714 59.358572 7.355329e+06 51.771963
2012-01-18 59.450000 59.791428 59.217143 59.502857 7.047043e+06 51.897808
2012-01-19 59.578572 59.960000 59.335715 59.707143 7.412086e+06 52.075984
2012-01-20 59.767143 60.180000 59.577143 59.988571 7.908014e+06 52.321443
2012-01-23 60.017143 60.387143 59.787143 60.204285 8.017800e+06 52.509586
2012-01-24 60.154286 60.672857 59.979999 60.474285 8.035857e+06 52.745077
2012-01-25 60.440000 60.958572 60.270000 60.749999 7.776786e+06 52.985553
2012-01-26 60.715714 61.205714 60.448571 60.910000 7.624814e+06 53.125103
2012-01-27 60.868572 61.361429 60.575714 61.010000 7.678514e+06 53.212323
2012-01-30 60.945715 61.445714 60.661428 61.108571 7.450271e+06 53.298295
2012-01-31 61.057143 61.491429 60.648571 61.158571 7.362086e+06 53.341905

In [25]:
df["Open"].plot()
df.rolling(window=7).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=14).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=28).mean()["Close"].plot(figsize=(16, 6))


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fcf11f6b6a0>

In [26]:
df["Close 30 Day MA"] = df["Close"].rolling(window=30).mean()
df[["Close 30 Day MA", "Close"]].plot(figsize=(16, 6))


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fcf103f47b8>

In [29]:
df["Close"].expanding().mean().plot(figsize=(16, 6))


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fcf100560f0>

Bollinger Bands

determining if the price is high or not


In [30]:
# Close 20 MA
df["Close: 20 Day Mean"] = df["Close"].rolling(20).mean()

# Upper = 20MA + 2 * std(20)
df["Upper"] = df["Close: 20 Day Mean"] + 2 * (df["Close"].rolling(20).std())

# Lower = 20MA - 2 * std(20)
df["Lower"] = df["Close: 20 Day Mean"] - 2 * (df["Close"].rolling(20).std())

# Close
df[["Close", "Close: 20 Day Mean", "Upper", "Lower"]].plot(figsize=(16, 6))


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fcf10220400>