In [1]:
import pandas as pd
import numpy as np
import datetime
from bokeh.io import show, output_notebook
output_notebook()
In [2]:
raw = pd.read_table('timelog.txt', quotechar=' ', sep=': ', names=['timestamp', 'activity'], engine='python')
# Set the column types
raw.activity = raw.activity.astype('category')
raw.timestamp = pd.to_datetime(raw.timestamp)
print(raw.dtypes)
# Add a date column
raw['date'] = raw.timestamp.dt.date
raw.head()
Out[2]:
In [3]:
# Make the start and end columns
raw['end'] = raw.timestamp
raw['start'] = raw['end'].shift(1)
raw = raw.loc[raw.activity != 'start']
raw.head()
Out[3]:
In [4]:
# Build midnight rows for activities
dates = []
stamps = []
activities = []
for date in raw.date.unique():
for activity in raw.activity.unique():
dates.append(date)
stamps.append(datetime.datetime(date.year, date.month, date.day, 0, 0, 0))
activities.append(activity)
empty_rows_df = pd.DataFrame.from_dict(
{
'timestamp': stamps,
'start': stamps,
'end': stamps,
'date': dates,
'activity': activities,
}
)
empty_rows_df.head(10)
Out[4]:
In [5]:
df = pd.concat([raw, empty_rows_df])
# Re-order columns
df = df.reindex(columns=['date', 'activity', 'timestamp', 'start', 'end'])
# Sort columns
df = df.sort(['date', 'activity', 'start'])
df.head(20)
Out[5]:
In [6]:
df['delta'] = df.end - df.start
df.head()
Out[6]:
In [7]:
# Should be no 'start' rows
df[df.activity == 'start']
Out[7]:
In [8]:
df = df.sort(['date', 'activity'])
df['human_delta'] = df.delta.dt.seconds / (60*60)
df.head(15)
Out[8]:
In [9]:
grouped = df.groupby(['date', 'activity'])
cumulative = grouped.human_delta.cumsum()
In [11]:
df['cumulative'] = cumulative
df[df.activity=='ponies'].head(10)
Out[11]:
In [12]:
to_plot = df[['timestamp', 'activity', 'cumulative']]
to_plot = to_plot.pivot('timestamp', 'activity').ffill().fillna(0)
to_plot.head(15)
Out[12]:
In [15]:
from bokeh.charts import TimeSeries
show(TimeSeries(to_plot))