Time Series Data



In [26]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Working with Datetime Objects



In [2]:

    
from datetime import datetime



In [3]:

    
my_year = 2017
my_month = 10
my_day = 14
my_hour = 15
my_minute = 30
my_second = 15

The Datetime Object



In [4]:

    
my_date = datetime(my_year, my_month, my_day)



In [5]:

    
my_date









    Out[5]:





datetime.datetime(2017, 10, 14, 0, 0)



In [6]:

    
my_date_time = datetime(my_year, my_month, my_day, my_hour, my_minute, my_second)



In [7]:

    
my_date_time









    Out[7]:





datetime.datetime(2017, 10, 14, 15, 30, 15)



In [8]:

    
type(my_date_time)









    Out[8]:





datetime.datetime



In [9]:

    
my_date_time.day









    Out[9]:





14



In [10]:

    
my_date_time.month









    Out[10]:





10

Making a datetime indexed dataframe



In [11]:

    
first_two = [datetime(2016, 1, 1), datetime(2016, 1, 2)]



In [12]:

    
first_two









    Out[12]:





[datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 1, 2, 0, 0)]



In [13]:

    
type(first_two)









    Out[13]:





list



In [14]:

    
type(first_two[0])









    Out[14]:





datetime.datetime



In [15]:

    
dt_ind = pd.DatetimeIndex(first_two)



In [17]:

    
dt_ind









    Out[17]:





DatetimeIndex(['2016-01-01', '2016-01-02'], dtype='datetime64[ns]', freq=None)



In [18]:

    
data = np.random.randn(2, 2)



In [19]:

    
cols = ["a", "b"]



In [20]:

    
df = pd.DataFrame(data, dt_ind, cols)



In [21]:

    
df



In [22]:

    
df.index.argmax()









    Out[22]:





1



In [23]:

    
df.index.argmin()









    Out[23]:





0



In [24]:

    
df.index.min()









    Out[24]:





Timestamp('2016-01-01 00:00:00')



In [25]:

    
type(df.index.min())









    Out[25]:





pandas._libs.tslib.Timestamp

Time Resampling



In [27]:

    
df = pd.read_csv("time_data/walmart_stock.csv")



In [28]:

    
df.head()



In [30]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
Date         1258 non-null object
Open         1258 non-null float64
High         1258 non-null float64
Low          1258 non-null float64
Close        1258 non-null float64
Volume       1258 non-null int64
Adj Close    1258 non-null float64
dtypes: float64(5), int64(1), object(1)
memory usage: 68.9+ KB



In [33]:

    
df["Date"] = pd.to_datetime(df["Date"]) # Be aware of formatting!!



In [34]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
Date         1258 non-null datetime64[ns]
Open         1258 non-null float64
High         1258 non-null float64
Low          1258 non-null float64
Close        1258 non-null float64
Volume       1258 non-null int64
Adj Close    1258 non-null float64
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 68.9 KB



In [35]:

    
df.set_index("Date", inplace=True)



In [37]:

    
df.head()

Quicker (but less controlled) way:



In [41]:

    
df2 = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)



In [42]:

    
df2.head()



In [47]:

    
df2.index









    Out[47]:





DatetimeIndex(['2012-01-03', '2012-01-04', '2012-01-05', '2012-01-06',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-17',
               ...
               '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
               '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
               '2016-12-29', '2016-12-30'],
              dtype='datetime64[ns]', name='Date', length=1258, freq=None)



In [46]:

    
type(df2.index[0])









    Out[46]:





pandas._libs.tslib.Timestamp

Resampling



In [49]:

    
df.resample(rule="A")









    Out[49]:





DatetimeIndexResampler [freq=<YearEnd: month=12>, axis=0, closed=right, label=right, convention=start, base=0]

Alias	Description
B	business day frequency
C	custom business day frequency (experimental)
D	calendar day frequency
W	weekly frequency
M	month end frequency
SM	semi-month end frequency (15th and end of month)
BM	business month end frequency
CBM	custom business month end frequency
MS	month start frequency
SMS	semi-month start frequency (1st and 15th)
BMS	business month start frequency
CBMS	custom business month start frequency
Q	quarter end frequency
BQ	business quarter endfrequency
QS	quarter start frequency
BQS	business quarter start frequency
A	year end frequency
BA	business year end frequency
AS	year start frequency
BAS	business year start frequency
BH	business hour frequency
H	hourly frequency
T, min	minutely frequency
S	secondly frequency
L, ms	milliseconds
U, us	microseconds
N	nanoseconds



In [51]:

    
df.resample(rule="A").mean()









    Out[51]:







  
    
      
      Open
      High
      Low
      Close
      Volume
      Adj Close
    
    
      Date
      
      
      
      
      
      
    
  
  
    
      2012-12-31
      67.158680
      67.602120
      66.786520
      67.215120
      9.239015e+06
      59.389349
    
    
      2013-12-31
      75.264048
      75.729405
      74.843055
      75.320516
      6.951496e+06
      68.147179
    
    
      2014-12-31
      77.274524
      77.740040
      76.864405
      77.327381
      6.515612e+06
      71.709712
    
    
      2015-12-31
      72.569405
      73.064167
      72.034802
      72.491111
      9.040769e+06
      68.831426
    
    
      2016-12-31
      69.481349
      70.019643
      69.023492
      69.547063
      9.371645e+06
      68.054229



In [52]:

    
df.resample(rule="BQ").mean()









    Out[52]:







  
    
      
      Open
      High
      Low
      Close
      Volume
      Adj Close
    
    
      Date
      
      
      
      
      
      
    
  
  
    
      2012-03-30
      60.462903
      60.807258
      60.160484
      60.519516
      8.850221e+06
      52.881839
    
    
      2012-06-29
      62.888889
      63.400159
      62.592222
      63.057143
      1.155795e+07
      55.594392
    
    
      2012-09-28
      73.081587
      73.549682
      72.717619
      73.174921
      7.871587e+06
      64.891949
    
    
      2012-12-31
      72.174678
      72.623226
      71.647741
      72.079839
      8.660958e+06
      64.161672
    
    
      2013-03-29
      70.898834
      71.393000
      70.552666
      71.026000
      8.482003e+06
      63.581880
    
    
      2013-06-28
      76.873906
      77.417813
      76.413281
      76.885781
      7.656086e+06
      69.393600
    
    
      2013-09-30
      75.637969
      76.030469
      75.213594
      75.629063
      5.796625e+06
      68.676662
    
    
      2013-12-31
      77.372656
      77.805313
      76.924531
      77.472813
      5.966927e+06
      70.651242
    
    
      2014-03-31
      75.339344
      75.802787
      74.863278
      75.337213
      7.050402e+06
      69.106983
    
    
      2014-06-30
      77.036508
      77.391111
      76.689524
      77.066508
      6.048765e+06
      71.284646
    
    
      2014-09-30
      75.674843
      76.062501
      75.375938
      75.748593
      5.550031e+06
      70.520795
    
    
      2014-12-31
      80.952969
      81.607500
      80.432344
      81.059844
      7.431025e+06
      75.797780
    
    
      2015-03-31
      85.096229
      85.699017
      84.491803
      85.107213
      7.209713e+06
      79.994746
    
    
      2015-06-30
      76.678730
      77.066191
      76.191111
      76.492540
      7.481765e+06
      72.501821
    
    
      2015-09-30
      68.884375
      69.366094
      68.328594
      68.825313
      8.883412e+06
      65.666534
    
    
      2015-12-31
      60.269688
      60.780156
      59.776563
      60.193281
      1.247800e+07
      57.743235
    
    
      2016-03-31
      65.564754
      66.346065
      65.015574
      65.776557
      1.198041e+07
      63.580834
    
    
      2016-06-30
      69.350000
      69.857656
      68.918906
      69.438437
      8.751062e+06
      67.762903
    
    
      2016-09-30
      72.808594
      73.230938
      72.371719
      72.782812
      8.236991e+06
      71.530300
    
    
      2016-12-30
      70.026985
      70.478889
      69.609048
      70.021111
      8.628798e+06
      69.150314



In [53]:

    
df.resample(rule="A").max()



In [54]:

    
def first_day(entry):
    return entry[0]



In [55]:

    
df.resample("A").apply(first_day)



In [56]:

    
df["Close"].resample("A").mean().plot(kind="bar")









    Out[56]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fabd9639048>



In [59]:

    
df["Close"].resample("M").mean().plot(kind="bar", figsize=(16, 6))









    Out[59]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fabd59bbf60>

Time Shifts



In [1]:

    
import pandas as pd



In [2]:

    
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
df = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)



In [4]:

    
df.head()



In [5]:

    
df.tail()

Shift up by one



In [15]:

    
df.shift(periods=1).head() # first gets filled in by NaN



In [14]:

    
df.shift(periods=-1).tail() # last gets filled by NaN



In [13]:

    
df.head()



In [16]:

    
df.tshift(freq="M").head() # shifts to end of month



In [17]:

    
df.shift(freq="A").head() # shifts to end of year

Rolling and Expanding



In [18]:

    
df.head()



In [19]:

    
df["Open"].plot(figsize=(16, 6))









    Out[19]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcf154680b8>



In [21]:

    
df.rolling(window=7).mean().head(20) # not inplace!









    Out[21]:







  
    
      
      Open
      High
      Low
      Close
      Volume
      Adj Close
    
    
      Date
      
      
      
      
      
      
    
  
  
    
      2012-01-03
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-04
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-05
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-06
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-09
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-10
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2012-01-11
      59.495714
      59.895714
      59.074285
      59.440000
      9.007414e+06
      51.842984
    
    
      2012-01-12
      59.469999
      59.744285
      59.007143
      59.321429
      8.231357e+06
      51.739567
    
    
      2012-01-13
      59.322857
      59.638571
      58.941428
      59.297143
      7.965071e+06
      51.718386
    
    
      2012-01-17
      59.397143
      59.708571
      59.105714
      59.358572
      7.355329e+06
      51.771963
    
    
      2012-01-18
      59.450000
      59.791428
      59.217143
      59.502857
      7.047043e+06
      51.897808
    
    
      2012-01-19
      59.578572
      59.960000
      59.335715
      59.707143
      7.412086e+06
      52.075984
    
    
      2012-01-20
      59.767143
      60.180000
      59.577143
      59.988571
      7.908014e+06
      52.321443
    
    
      2012-01-23
      60.017143
      60.387143
      59.787143
      60.204285
      8.017800e+06
      52.509586
    
    
      2012-01-24
      60.154286
      60.672857
      59.979999
      60.474285
      8.035857e+06
      52.745077
    
    
      2012-01-25
      60.440000
      60.958572
      60.270000
      60.749999
      7.776786e+06
      52.985553
    
    
      2012-01-26
      60.715714
      61.205714
      60.448571
      60.910000
      7.624814e+06
      53.125103
    
    
      2012-01-27
      60.868572
      61.361429
      60.575714
      61.010000
      7.678514e+06
      53.212323
    
    
      2012-01-30
      60.945715
      61.445714
      60.661428
      61.108571
      7.450271e+06
      53.298295
    
    
      2012-01-31
      61.057143
      61.491429
      60.648571
      61.158571
      7.362086e+06
      53.341905



In [25]:

    
df["Open"].plot()
df.rolling(window=7).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=14).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=28).mean()["Close"].plot(figsize=(16, 6))









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcf11f6b6a0>



In [26]:

    
df["Close 30 Day MA"] = df["Close"].rolling(window=30).mean()
df[["Close 30 Day MA", "Close"]].plot(figsize=(16, 6))









    Out[26]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcf103f47b8>



In [29]:

    
df["Close"].expanding().mean().plot(figsize=(16, 6))









    Out[29]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcf100560f0>

Bollinger Bands

determining if the price is high or not



In [30]:

    
# Close 20 MA
df["Close: 20 Day Mean"] = df["Close"].rolling(20).mean()

# Upper = 20MA + 2 * std(20)
df["Upper"] = df["Close: 20 Day Mean"] + 2 * (df["Close"].rolling(20).std())

# Lower = 20MA - 2 * std(20)
df["Lower"] = df["Close: 20 Day Mean"] - 2 * (df["Close"].rolling(20).std())

# Close
df[["Close", "Close: 20 Day Mean", "Upper", "Lower"]].plot(figsize=(16, 6))









    Out[30]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcf10220400>

	a	b
2016-01-01	0.578400	-1.105279
2016-01-02	-0.106859	-0.116752

	Date	Open	High	Low	Close	Volume	Adj Close
0	2012-01-03	59.970001	61.060001	59.869999	60.330002	12668800	52.619235
1	2012-01-04	60.209999	60.349998	59.470001	59.709999	9593300	52.078475
2	2012-01-05	59.349998	59.619999	58.369999	59.419998	12768200	51.825539
3	2012-01-06	59.419998	59.450001	58.869999	59.000000	8069400	51.459220
4	2012-01-09	59.029999	59.549999	58.919998	59.180000	6679300	51.616215

	Open	High	Low	Close	Volume	Adj Close
Date
2012-01-03	59.970001	61.060001	59.869999	60.330002	12668800	52.619235
2012-01-04	60.209999	60.349998	59.470001	59.709999	9593300	52.078475
2012-01-05	59.349998	59.619999	58.369999	59.419998	12768200	51.825539
2012-01-06	59.419998	59.450001	58.869999	59.000000	8069400	51.459220
2012-01-09	59.029999	59.549999	58.919998	59.180000	6679300	51.616215

	Open	High	Low	Close	Volume	Adj Close
Date
2012-01-03	59.970001	61.060001	59.869999	60.330002	12668800	52.619235
2012-01-04	60.209999	60.349998	59.470001	59.709999	9593300	52.078475
2012-01-05	59.349998	59.619999	58.369999	59.419998	12768200	51.825539
2012-01-06	59.419998	59.450001	58.869999	59.000000	8069400	51.459220
2012-01-09	59.029999	59.549999	58.919998	59.180000	6679300	51.616215

	Open	High	Low	Close	Volume	Adj Close
Date
2012-12-31	77.599998	77.599998	76.690002	77.150002	38007300	68.568371
2013-12-31	81.209999	81.370003	80.820000	81.209999	25683700	73.929868
2014-12-31	87.080002	88.089996	86.480003	87.540001	22812400	81.707680
2015-12-31	90.800003	90.970001	89.250000	90.470001	80898100	84.914216
2016-12-31	74.500000	75.190002	73.629997	74.300003	35076700	73.233524

	Open	High	Low	Close	Volume	Adj Close
Date
2012-12-31	67.158680	67.602120	66.786520	67.215120	9.239015e+06	59.389349
2013-12-31	75.264048	75.729405	74.843055	75.320516	6.951496e+06	68.147179
2014-12-31	77.274524	77.740040	76.864405	77.327381	6.515612e+06	71.709712
2015-12-31	72.569405	73.064167	72.034802	72.491111	9.040769e+06	68.831426
2016-12-31	69.481349	70.019643	69.023492	69.547063	9.371645e+06	68.054229

	Open	High	Low	Close	Volume	Adj Close
Date
2012-03-30	60.462903	60.807258	60.160484	60.519516	8.850221e+06	52.881839
2012-06-29	62.888889	63.400159	62.592222	63.057143	1.155795e+07	55.594392
2012-09-28	73.081587	73.549682	72.717619	73.174921	7.871587e+06	64.891949
2012-12-31	72.174678	72.623226	71.647741	72.079839	8.660958e+06	64.161672
2013-03-29	70.898834	71.393000	70.552666	71.026000	8.482003e+06	63.581880
2013-06-28	76.873906	77.417813	76.413281	76.885781	7.656086e+06	69.393600
2013-09-30	75.637969	76.030469	75.213594	75.629063	5.796625e+06	68.676662
2013-12-31	77.372656	77.805313	76.924531	77.472813	5.966927e+06	70.651242
2014-03-31	75.339344	75.802787	74.863278	75.337213	7.050402e+06	69.106983
2014-06-30	77.036508	77.391111	76.689524	77.066508	6.048765e+06	71.284646
2014-09-30	75.674843	76.062501	75.375938	75.748593	5.550031e+06	70.520795
2014-12-31	80.952969	81.607500	80.432344	81.059844	7.431025e+06	75.797780
2015-03-31	85.096229	85.699017	84.491803	85.107213	7.209713e+06	79.994746
2015-06-30	76.678730	77.066191	76.191111	76.492540	7.481765e+06	72.501821
2015-09-30	68.884375	69.366094	68.328594	68.825313	8.883412e+06	65.666534
2015-12-31	60.269688	60.780156	59.776563	60.193281	1.247800e+07	57.743235
2016-03-31	65.564754	66.346065	65.015574	65.776557	1.198041e+07	63.580834
2016-06-30	69.350000	69.857656	68.918906	69.438437	8.751062e+06	67.762903
2016-09-30	72.808594	73.230938	72.371719	72.782812	8.236991e+06	71.530300
2016-12-30	70.026985	70.478889	69.609048	70.021111	8.628798e+06	69.150314

	Open	High	Low	Close	Volume	Adj Close
Date
2016-12-23	69.430000	69.750000	69.360001	69.540001	4803900	69.032411
2016-12-27	69.300003	69.820000	69.250000	69.699997	4435700	69.191240
2016-12-28	69.940002	70.000000	69.260002	69.309998	4875700	68.804087
2016-12-29	69.209999	69.519997	69.120003	69.260002	4298400	68.754456
2016-12-30	69.120003	69.430000	68.830002	69.120003	6889500	68.615479

	Open	High	Low	Close	Volume	Adj Close
Date
2012-01-31	59.970001	61.060001	59.869999	60.330002	12668800	52.619235
2012-01-31	60.209999	60.349998	59.470001	59.709999	9593300	52.078475
2012-01-31	59.349998	59.619999	58.369999	59.419998	12768200	51.825539
2012-01-31	59.419998	59.450001	58.869999	59.000000	8069400	51.459220
2012-01-31	59.029999	59.549999	58.919998	59.180000	6679300	51.616215