Initialize Settings


In [ ]:
import json
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import statsmodels.api as sm

plt.style.use('ggplot')
plt.rcParams['text.color'] = 'black'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['xtick.color'] = 'gray'
plt.rcParams['ytick.color'] = 'gray'
plt.rcParams['axes.labelcolor'] = 'gray'

plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 12
% matplotlib inline

Open Data w/ Initial Formatting


In [ ]:
with open('wunderlist-20160923-13-58-43.json') as f:
    wunderlist_json = json.loads(f.read())
    tasks = pd.DataFrame(wunderlist_json['data']['tasks'])
    tasks.completed_at = pd.to_datetime(tasks.completed_at)

In [ ]:
with open('Daily Rating - Data.csv') as f:
    daily_rating = pd.read_csv(f)
    daily_rating.rename(columns={'Unnamed: 0': 'date'}, inplace=True)
    daily_rating.date = pd.to_datetime(daily_rating.date)
    daily_rating = daily_rating.set_index('date')

In [ ]:
with open('sleepdata.csv') as f:
    sleep_data = pd.read_csv(f, delimiter=';')
    sleep_data['Sleep Notes'] = sleep_data['Sleep Notes'].str.split(':')
    sleep_data.End = pd.to_datetime(pd.to_datetime(sleep_data.End).dt.date)
    sleep_data.set_index('End', inplace=True)
    sleep_data['Sleep quality'] = sleep_data['Sleep quality'].str.replace('%', '').astype('int')
    sleep_data.Sleep_quality = sleep_data['Sleep quality']

Data Analysis / Visualization


In [ ]:
# Tasks Completed By Week
task_group = pd.DataFrame(tasks[tasks.completed_at.notnull()])
tasks_by_date = task_group.set_index('completed_at').groupby(pd.TimeGrouper('d')).sum()
fig, ax = plt.subplots()
ax.plot(tasks_by_date.groupby(pd.TimeGrouper('w')).sum().completed)
fig.autofmt_xdate()
ax.set_title("Wunderlist Tasks Completed by Week")
ax.set_ylabel("Tasks Completed")
fig.tight_layout()

In [ ]:
# Average Daily Rating by Week
daily_rating = pd.DataFrame(daily_rating[daily_rating.Rating.notnull()])
ax = daily_rating.groupby(pd.TimeGrouper('w')).Rating.mean().plot()
ax.set_ylim(3, 8)
ax.set_title("Average Daily Rating per Week")

In [ ]:
# Daily Rating vs. Tasks Completed
merged = pd.merge(daily_rating, tasks_by_date, left_index=True, right_index=True)
sizes_counts = merged.groupby(['completed', 'Rating']).size().reset_index().rename(columns={0:'count'})
sizes = np.array([next(x for _, x in sizes_counts.iterrows() 
                       if x.Rating == y.Rating and x.completed == y.completed)['count']
                  for _, y in merged.iterrows()])
plt.scatter(merged.completed, merged.Rating, s=sizes * 15, c='blue')
plt.ylabel("Daily Rating")
plt.xlabel("Tasks Completed")

In [ ]:
# Hours Slept over time
fig, ax = plt.subplots()
sleep_data.Time_in_bed = pd.to_timedelta(sleep_data['Time in bed'] + ":00").astype('timedelta64[m]')
sleep_data.Time_in_bed[sleep_data.Time_in_bed > 60].groupby(pd.TimeGrouper('w')).mean().plot()
plt.axhline(y=480, c='r', linestyle='dashed')
ax.set_ylim(5.8 * 60, 9 * 60)
ax.set_yticks(np.arange(6 * 60, 10 * 60, 60))
ax.set_ylabel("Hours Slept")
ax.set_xlabel("")
ax.yaxis.set_major_formatter(tkr.FuncFormatter(lambda x, y: int(x/60)))
fig.tight_layout()

In [ ]:
# Sleep Quality over time
fig, ax = plt.subplots()
ax = sleep_data.Sleep_quality[sleep_data.Sleep_quality > 0].groupby(pd.TimeGrouper('w')).mean().plot()
ax.set_ylabel("Sleep Quality (%)")
ax.set_xlabel("")
ax.set_title("Sleep Quality Over Time")
fig.tight_layout()

In [ ]:
# Sleep Quality vs Daily Rating
all_data = pd.merge(sleep_data, merged, left_index=True, right_index=True)
all_data.head()
sleep_quality = all_data['Sleep quality']
sleep_frequencies = all_data.groupby(['Sleep quality', 'Rating']).size().reset_index().rename(columns={0:'count'})
sleep_sizes = np.array([next(x for _, x in sleep_frequencies.iterrows() 
                             if x.Rating == y.Rating and x['Sleep quality'] == y['Sleep quality'])['count']
                        for _, y in all_data.iterrows()])

fig, ax = plt.subplots()
plt.scatter(sleep_quality, all_data.Rating, s=sleep_sizes * 10, c='blue')
plt.plot(sleep_quality, np.poly1d(np.polyfit(sleep_quality, all_data.Rating, 1))(sleep_quality))
ax.set_ylabel("Daily Rating")
ax.set_xlabel("Sleep Quality (%)")