In [ ]:
"""
----------------------------------------------------------------------
Filename : 04_time_series_in_pandas.py
Date : 12th Dec, 2013
Author : Jaidev Deshpande
Purpose : Introduction to time series analysis and plotting in Pandas
Libraries: Pandas, NumPy, Matplotlib
----------------------------------------------------------------------
"""
In [ ]:
# standard library imports
from datetime import date
# system library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [ ]:
def create_timestamp_sample():
"""
Create a Pandas `TimeSeries` object with random data,
timestamped from 1st January 2000 to 31st December 2013
"""
times = pd.date_range(start='1/1/2000', end='31/12/2013')
data = np.random.randn(len(times))
return pd.Series(data, index=times)
In [ ]:
ts = create_timestamp_sample()
In [ ]:
print(ts['1/1/2000'])
In [ ]:
# Create a datetime object
dt = date(2000,1,1)
print(ts[dt])
In [ ]:
# Slicing a timeseries by dates:
print(ts['1/1/2000':'31/1/2000'])
In [ ]:
# Changing the frequency of a TimeSeries
converted_s = ts.asfreq('M',method='bfill')
fig, axes = plt.subplots(nrows=2,ncols=1)
ts.plot(ax=axes[0],figsize=(20,10))
axes[0].set_title('Daily, for 14 years')
converted_s.plot(ax=axes[1])
axes[1].set_title('Monthly, for 14 years')
plt.show()
In [ ]:
def series_interpolation(n_sample):
# Create a Series with some empty data
x = np.linspace(-4*np.pi, 4*np.pi, 1000)
y = np.sin(x)
samples = np.arange(0,1000,1000.0/n_sample).astype(int)
y_sampled = np.zeros((1000,),float)
y_sampled[samples] = y[samples]
y_sampled[y_sampled==0] = np.nan
Y = pd.Series(y_sampled)
return Y
In [ ]:
# Series interpolation demo
sample_sizes = [10,25,50,75,100]
sampled_data = [series_interpolation(n) for n in sample_sizes]
fig, axes = plt.subplots(nrows=len(sample_sizes),ncols=1, figsize=(20,10))
for i in range(len(sample_sizes)):
data = sampled_data[i]
interpolated = data.interpolate()
data[data.notnull()].plot(style='ro',ax=axes[i])
interpolated.plot(style='b-', ax=axes[i])
axes[i].set_title("Sample Size = "+str(sample_sizes[i]))
plt.show()
In [ ]:
# Upsampling and downsampling
print(ts.index.freq)
upsampled = ts.resample('12H',fill_method='bfill')
downsampled = ts.resample('M')
print(upsampled.shape)
print(downsampled.shape)
fig, axes = plt.subplots(3,1, figsize=(20,10))
ts.plot(ax=axes[0],title='Original Data')
upsampled.plot(ax=axes[1],title='Upsampled Data')
downsampled.plot(ax=axes[2], title='Downsampled Data')
plt.show()
stock_px.csv file in the data folder into a DataFrametimestamp column as index
In [ ]:
def create_interpolation_exercise():
random.seed(0)
x = zeros((1000,),dtype=float)
inds = random.randint(low=0, high=1000, size=(100,))
values = random.randn(100)
x[inds] = values
x[x==0] = nan
return pd.Series(x)
create_interpolation_exercise to create a Series randomly filled with NaNs.