Different analyses of data collected using https://github.com/amadeuspzs/travelTime/blob/master/travelTime.py
In [7]:
%matplotlib inline
import pandas as pd, matplotlib.pyplot as plt, matplotlib.dates as dates, math
from datetime import datetime
from utils import find_weeks, find_days # custom
from pytz import timezone
from detect_peaks import detect_peaks
from ipywidgets import interact, interactive, fixed, interact_manual
In [8]:
filename = 'data/home-montauk.csv'
tz = timezone('US/Eastern')
data = pd.read_csv(filename)
data.head(5)
Out[8]:
Convert the unix timestamp to a datetime object:
In [9]:
data.Timestamp=data.apply(lambda row: datetime.fromtimestamp(int(row['Timestamp']),tz),axis=1)
data.head(5)
Out[9]:
Add a new column with the duration in hours
In [10]:
data['Duration(h)']=data.apply(lambda row: float(row['Duration(s)'])/(60*60),axis=1)
data.head(5)
Out[10]:
Let's have a quick visualization:
In [11]:
ax = data.plot(x='Timestamp',y='Duration(h)')
In [23]:
weeks = find_weeks(data)
num_cols = 2
num_rows = int(math.ceil(len(weeks) / float(num_cols)))
ylim = [min([min(data[week[0]:week[1]+1]['Duration(h)']) for week in weeks]),
max([max(data[week[0]:week[1]+1]['Duration(h)']) for week in weeks])]
plt.figure(1,figsize=(14, 7))
for e, week in enumerate(weeks):
ax = plt.subplot(num_rows,num_cols,e+1)
data[week[0]:week[1]].plot(x='Timestamp',y='Duration(h)',ax=ax)
ax.grid()
ax.set_ylim(ylim)
plt.tight_layout()
In [22]:
days = find_days(data,5) #Friday
num_cols = 3
num_rows = int(math.ceil(len(weeks) / float(num_cols)))
ylim = [min([min(data[day[0]:day[1]+1]['Duration(h)']) for day in days]),
max([max(data[day[0]:day[1]+1]['Duration(h)']) for day in days])]
plt.figure(1,figsize=(14, 7))
for e, day in enumerate(days):
ax = plt.subplot(num_rows,num_cols,e+1)
data[day[0]:day[1]].plot(x='Timestamp',y='Duration(h)',ax=ax)
ax.xaxis.set_major_formatter(dates.DateFormatter('%H',tz))
ax.xaxis.set_major_locator(dates.HourLocator(interval=1))
ax.grid()
ax.set_ylim(ylim)
plt.tight_layout()
In [19]:
week = find_weeks(data)[2] # choose one week
week_data = data[week[0]:week[1]+1]
@interact(mpd=50,mph=1.0)
def peaks(mpd, mph):
indexes = detect_peaks(week_data['Duration(h)'],mpd=mpd,mph=mph,show=True)
for index in indexes:
print week_data.iloc[[index]].Timestamp.dt.strftime("%a %H:%M").values[0]
In [21]:
@interact(mpd=130)
def peaks(mpd):
indexes = detect_peaks(week_data['Duration(h)'],valley=True,mpd=mpd,show=True)
for index in indexes:
print week_data.iloc[[index]].Timestamp.dt.strftime("%a %H:%M").values[0]