In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import nba_py
sns.set_context('poster')
import plotly.offline as py
import plotly.graph_objs as go
In [2]:
py.init_notebook_mode(connected=True)
In [3]:
data_path = os.path.join(os.getcwd(), os.pardir, 'data', 'interim', 'sleep_data.csv')
df_sleep = pd.read_csv(data_path, index_col='shifted_datetime', parse_dates=True)
In [4]:
df_sleep.index += pd.Timedelta(hours=12)
In [5]:
sleep_day = df_sleep.resample('1D').sum().fillna(0)
In [6]:
data_path = os.path.join(os.getcwd(), os.pardir, 'data', 'interim', 'activity_data.csv')
df_activity = pd.read_csv(data_path, index_col='datetime', parse_dates=True)
In [7]:
df_activity.columns
Out[7]:
In [8]:
toplot = df_activity['minutesVeryActive']
data = []
data.append(
go.Scatter(
x=toplot.index,
y=toplot.values,
name='Minutes Very Active'
)
)
layout = go.Layout(
title="Daily Very Active Minutes",
yaxis=dict(
title='Minutes'
),
)
fig = {
'data': data,
'layout': layout,
}
py.iplot(fig, filename='DailyVeryActiveMinutes')
Lots of variation here. Can I relate this to Ultimate frisbee? Spring hat league started on April 14 and played once a week on Fridays until May 19. Meanwhile, I started playing summer club league on May 2. We play twice a week on Tuesdays and Thursdays with our last game on August 17.
In [9]:
dayofweek = df_activity.index.dayofweek
In [11]:
index_summerleague = df_activity.index >= '2017-05-02'
In [12]:
df_activity_summer = df_activity[index_summerleague]
In [13]:
summer_dayofweek = df_activity_summer.index.dayofweek
In [15]:
df_activity_summer['dayofweek'] = summer_dayofweek
In [16]:
df_activity_summer.groupby('dayofweek').mean()
Out[16]:
Monday = 0, Sunday = 6. Saturday, Sunday, and Wednesday stand out as days where I have fewer Very Active minutes, but there is no obvious evidence that Tuesday and Thursday are days where I am running around chasing plastic for 1-2 hours. I suspect that part of the challenge here is that I ride my bike to work every day. It's 15 - 20 minutes each way, so if that time on the bike goes in to the "Very Active" bin according to Fitbit, then it will be mixed in with ultimate frisbee minutes. I might be able to filter out bike rides by looking at the start time of each activity. However, I will need to go back to the Fitbit API to extract that information.
In [17]:
df_activity_summer.groupby('dayofweek').std()
Out[17]:
In [ ]: