In [1]:
from __future__ import print_function
import os
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
In [26]:
#Read dataset into pandas DataFrame
df = pd.read_csv('datasets/chemical-concentration-readings.csv')
In [27]:
#Let's see the shape of the dataset
print('Shape of the dataset:', df.shape)
In [28]:
#Let's see first 10 rows of the DataFrame
df.head(10)
Out[28]:
In [29]:
#The observations seem to be taken at an interval of 2 hours
In [30]:
#Parse the timestamp to datetime row index of the DataFrame
datetime_rowid = df['Timestamp'].map(lambda t: pd.to_datetime(t, format='%Y-%m-%d %H:%M:%S'))
df.index = datetime_rowid
df.head(10)
Out[30]:
In [38]:
#Resample and compute daily mean
daily = df['Chemical conc.'].resample('D')
daily_mean = daily.mean()
In [39]:
#Plot original time series and daily mean
fig = plt.figure(figsize=(5.5, 5.5))
ax = fig.add_subplot(1,1,1)
df['Chemical conc.'].plot(ax=ax, color='b')
daily_mean.plot(ax=ax, color='r')
ax.set_title('Bi-hourly reading (blue) & Daily Mean (red)')
ax.set_xlabel('Days in Jan 1975')
ax.set_ylabel('Chemical concentration')
plt.savefig('plots/ch2/B07887_02_02.png', format='png', dpi=300)
In [11]:
"""
Let us shown an example of grouping by a period
"""
Out[11]:
In [12]:
#Load the DataFrame and re-index the row to datetime64
df = pd.read_csv('datasets/mean-daily-temperature-fisher-river.csv')
df.index = df['Date'].map(lambda d: pd.to_datetime(d, format = '%Y-%m-%d'))
In [13]:
#Display shape of the DataFrame
print('Shape of dataframe:', df.shape)
In [14]:
#Let's see first 10 rows
df.head(10)
Out[14]:
In [15]:
#Plot original time series on daily mean temparature
fig = plt.figure(figsize=(5.5, 5.5))
ax = fig.add_subplot(1,1,1)
df['Mean temparature'].plot(ax=ax, color='b')
ax.set_title('Mean daily temparature')
plt.savefig('plots/ch2/B07887_02_03.png', format='png', dpi=300)
In [16]:
#We need to groupby the data for every month and find aggregate statistics
#Let's start by adding a Month_Year column
df['Month_Year'] = df.index.map(lambda d: d.strftime('%m-%Y'))
df.head(10)
Out[16]:
In [17]:
#Calculate month wise statistics
monthly_stats = df.groupby(by='Month_Year')['Mean temparature'].aggregate([np.mean, np.median,
np.std
])
monthly_stats.reset_index(inplace=True)
monthly_stats.head(10)
Out[17]:
In [18]:
#Let's create month and year columns and sort by them to reorder the rows
monthly_stats['Year'] = monthly_stats['Month_Year']\
.map(lambda m: pd.to_datetime(m, format='%m-%Y').strftime('%Y'))
monthly_stats['Month'] = monthly_stats['Month_Year']\
.map(lambda m: pd.to_datetime(m, format='%m-%Y').strftime('%m'))
monthly_stats.sort_values(by=['Year', 'Month'], inplace=True)
monthly_stats.head(10)
Out[18]:
In [19]:
#Let's set the Month_Year as the row index
monthly_stats.index = monthly_stats['Month_Year']
In [20]:
#Plot original time series and daily mean
fig = plt.figure(figsize=(5.5, 5.5))
ax = fig.add_subplot(1,1,1)
monthly_stats['mean'].plot(ax=ax, color='b')
monthly_stats['std'].plot(ax=ax, color='r')
ax.set_title('Monthly statistics: Mean (blue) & Std. Dev. (red)')
plt.savefig('plots/ch2/B07887_02_04.png', format='png', dpi=300)
In [21]:
#Now we will calculate weekly moving average on the original time series of mean daily temparature
weekly_moving_average = df['Mean temparature'].rolling(7).mean()
In [22]:
#Now we will calculate monthly moving average on the original time series of mean daily temparature
monthly_moving_average = df['Mean temparature'].rolling(30).mean()
In [23]:
#Let's caluclate the weekly and monthly avergaes with a stride of length 2
weekly_moving_average_2stride = df['Mean temparature'].rolling(7).mean()[::2]
monthly_moving_average_2stride = df['Mean temparature'].rolling(30).mean()[::2]
In [24]:
#Plot original time series and weekly moving average
fig, axarr = plt.subplots(3, sharex=True)
fig.set_size_inches(5.5, 5,5)
df['Mean temparature'].plot(ax=axarr[0], color='b')
axarr[0].set_title('Daily mean temparature')
weekly_moving_average.plot(ax=axarr[1], color='r')
axarr[1].set_title('Weekly moving average')
monthly_moving_average.plot(ax=axarr[2], color='g')
axarr[2].set_title('Monthly moving average')
plt.savefig('plots/ch2/B07887_02_05.png', format='png', dpi=300)