In [1]:
import pandas as pd
import numpy as np
import datetime
from bokeh.io import show, output_notebook
output_notebook()


BokehJS successfully loaded.

In [2]:
raw = pd.read_table('timelog.txt', quotechar=' ', sep=': ', names=['timestamp', 'activity'], engine='python')

# Set the column types
raw.activity = raw.activity.astype('category')
raw.timestamp = pd.to_datetime(raw.timestamp)
print(raw.dtypes)

# Add a date column
raw['date'] = raw.timestamp.dt.date
raw.head()


timestamp    datetime64[ns]
activity           category
dtype: object
Out[2]:
timestamp activity date
0 2015-06-28 06:42:00 start 2015-06-28
1 2015-06-28 10:25:00 unicorns 2015-06-28
2 2015-06-28 12:10:00 ponies 2015-06-28
3 2015-06-28 14:18:00 unicorns 2015-06-28
4 2015-06-28 16:28:00 ponies 2015-06-28

In [3]:
# Make the start and end columns

raw['end'] = raw.timestamp
raw['start'] = raw['end'].shift(1)
raw = raw.loc[raw.activity != 'start']
raw.head()


Out[3]:
timestamp activity date end start
1 2015-06-28 10:25:00 unicorns 2015-06-28 2015-06-28 10:25:00 2015-06-28 06:42:00
2 2015-06-28 12:10:00 ponies 2015-06-28 2015-06-28 12:10:00 2015-06-28 10:25:00
3 2015-06-28 14:18:00 unicorns 2015-06-28 2015-06-28 14:18:00 2015-06-28 12:10:00
4 2015-06-28 16:28:00 ponies 2015-06-28 2015-06-28 16:28:00 2015-06-28 14:18:00
5 2015-06-28 17:30:00 unicorns 2015-06-28 2015-06-28 17:30:00 2015-06-28 16:28:00

In [4]:
# Build midnight rows for activities

dates = []
stamps = []
activities = []
for date in raw.date.unique():
    for activity in raw.activity.unique():
        dates.append(date)
        stamps.append(datetime.datetime(date.year, date.month, date.day, 0, 0, 0))
        activities.append(activity)
empty_rows_df = pd.DataFrame.from_dict(
    {
    'timestamp': stamps,
    'start': stamps,
    'end': stamps,
    'date': dates,
    'activity': activities,
    }
)
empty_rows_df.head(10)


Out[4]:
activity date end start timestamp
0 unicorns 2015-06-28 2015-06-28 2015-06-28 2015-06-28
1 ponies 2015-06-28 2015-06-28 2015-06-28 2015-06-28
2 flying 2015-06-28 2015-06-28 2015-06-28 2015-06-28
3 oh the grind 2015-06-28 2015-06-28 2015-06-28 2015-06-28
4 serious business 2015-06-28 2015-06-28 2015-06-28 2015-06-28
5 unicorns 2015-06-29 2015-06-29 2015-06-29 2015-06-29
6 ponies 2015-06-29 2015-06-29 2015-06-29 2015-06-29
7 flying 2015-06-29 2015-06-29 2015-06-29 2015-06-29
8 oh the grind 2015-06-29 2015-06-29 2015-06-29 2015-06-29
9 serious business 2015-06-29 2015-06-29 2015-06-29 2015-06-29

In [5]:
df = pd.concat([raw, empty_rows_df])
# Re-order columns
df = df.reindex(columns=['date', 'activity', 'timestamp', 'start', 'end'])
# Sort columns
df = df.sort(['date', 'activity', 'start'])
df.head(20)


Out[5]:
date activity timestamp start end
2 2015-06-28 flying 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00
12 2015-06-28 flying 2015-06-28 21:32:00 2015-06-28 20:32:00 2015-06-28 21:32:00
3 2015-06-28 oh the grind 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00
1 2015-06-28 ponies 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00
2 2015-06-28 ponies 2015-06-28 12:10:00 2015-06-28 10:25:00 2015-06-28 12:10:00
4 2015-06-28 ponies 2015-06-28 16:28:00 2015-06-28 14:18:00 2015-06-28 16:28:00
6 2015-06-28 ponies 2015-06-28 17:48:00 2015-06-28 17:30:00 2015-06-28 17:48:00
7 2015-06-28 ponies 2015-06-28 18:57:00 2015-06-28 17:48:00 2015-06-28 18:57:00
8 2015-06-28 ponies 2015-06-28 19:17:00 2015-06-28 18:57:00 2015-06-28 19:17:00
9 2015-06-28 ponies 2015-06-28 19:24:00 2015-06-28 19:17:00 2015-06-28 19:24:00
11 2015-06-28 ponies 2015-06-28 20:32:00 2015-06-28 19:29:00 2015-06-28 20:32:00
4 2015-06-28 serious business 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00
0 2015-06-28 unicorns 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00
1 2015-06-28 unicorns 2015-06-28 10:25:00 2015-06-28 06:42:00 2015-06-28 10:25:00
3 2015-06-28 unicorns 2015-06-28 14:18:00 2015-06-28 12:10:00 2015-06-28 14:18:00
5 2015-06-28 unicorns 2015-06-28 17:30:00 2015-06-28 16:28:00 2015-06-28 17:30:00
10 2015-06-28 unicorns 2015-06-28 19:29:00 2015-06-28 19:24:00 2015-06-28 19:29:00
7 2015-06-29 flying 2015-06-29 00:00:00 2015-06-29 00:00:00 2015-06-29 00:00:00
8 2015-06-29 oh the grind 2015-06-29 00:00:00 2015-06-29 00:00:00 2015-06-29 00:00:00
14 2015-06-29 oh the grind 2015-06-29 12:12:00 2015-06-29 11:22:00 2015-06-29 12:12:00

In [6]:
df['delta'] = df.end - df.start
df.head()


Out[6]:
date activity timestamp start end delta
2 2015-06-28 flying 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00
12 2015-06-28 flying 2015-06-28 21:32:00 2015-06-28 20:32:00 2015-06-28 21:32:00 01:00:00
3 2015-06-28 oh the grind 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00
1 2015-06-28 ponies 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00
2 2015-06-28 ponies 2015-06-28 12:10:00 2015-06-28 10:25:00 2015-06-28 12:10:00 01:45:00

In [7]:
# Should be no 'start' rows
df[df.activity == 'start']


Out[7]:
date activity timestamp start end delta

In [8]:
df = df.sort(['date', 'activity'])
df['human_delta'] = df.delta.dt.seconds / (60*60)
df.head(15)


Out[8]:
date activity timestamp start end delta human_delta
2 2015-06-28 flying 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000
12 2015-06-28 flying 2015-06-28 21:32:00 2015-06-28 20:32:00 2015-06-28 21:32:00 01:00:00 1.000000
3 2015-06-28 oh the grind 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000
1 2015-06-28 ponies 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000
2 2015-06-28 ponies 2015-06-28 12:10:00 2015-06-28 10:25:00 2015-06-28 12:10:00 01:45:00 1.750000
4 2015-06-28 ponies 2015-06-28 16:28:00 2015-06-28 14:18:00 2015-06-28 16:28:00 02:10:00 2.166667
6 2015-06-28 ponies 2015-06-28 17:48:00 2015-06-28 17:30:00 2015-06-28 17:48:00 00:18:00 0.300000
7 2015-06-28 ponies 2015-06-28 18:57:00 2015-06-28 17:48:00 2015-06-28 18:57:00 01:09:00 1.150000
8 2015-06-28 ponies 2015-06-28 19:17:00 2015-06-28 18:57:00 2015-06-28 19:17:00 00:20:00 0.333333
9 2015-06-28 ponies 2015-06-28 19:24:00 2015-06-28 19:17:00 2015-06-28 19:24:00 00:07:00 0.116667
11 2015-06-28 ponies 2015-06-28 20:32:00 2015-06-28 19:29:00 2015-06-28 20:32:00 01:03:00 1.050000
4 2015-06-28 serious business 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000
0 2015-06-28 unicorns 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000
1 2015-06-28 unicorns 2015-06-28 10:25:00 2015-06-28 06:42:00 2015-06-28 10:25:00 03:43:00 3.716667
3 2015-06-28 unicorns 2015-06-28 14:18:00 2015-06-28 12:10:00 2015-06-28 14:18:00 02:08:00 2.133333

In [9]:
grouped = df.groupby(['date', 'activity'])
cumulative = grouped.human_delta.cumsum()

In [11]:
df['cumulative'] = cumulative
df[df.activity=='ponies'].head(10)


Out[11]:
date activity timestamp start end delta human_delta cumulative
1 2015-06-28 ponies 2015-06-28 00:00:00 2015-06-28 00:00:00 2015-06-28 00:00:00 00:00:00 0.000000 0.000000
2 2015-06-28 ponies 2015-06-28 12:10:00 2015-06-28 10:25:00 2015-06-28 12:10:00 01:45:00 1.750000 1.750000
4 2015-06-28 ponies 2015-06-28 16:28:00 2015-06-28 14:18:00 2015-06-28 16:28:00 02:10:00 2.166667 3.916667
6 2015-06-28 ponies 2015-06-28 17:48:00 2015-06-28 17:30:00 2015-06-28 17:48:00 00:18:00 0.300000 4.216667
7 2015-06-28 ponies 2015-06-28 18:57:00 2015-06-28 17:48:00 2015-06-28 18:57:00 01:09:00 1.150000 5.366667
8 2015-06-28 ponies 2015-06-28 19:17:00 2015-06-28 18:57:00 2015-06-28 19:17:00 00:20:00 0.333333 5.700000
9 2015-06-28 ponies 2015-06-28 19:24:00 2015-06-28 19:17:00 2015-06-28 19:24:00 00:07:00 0.116667 5.816667
11 2015-06-28 ponies 2015-06-28 20:32:00 2015-06-28 19:29:00 2015-06-28 20:32:00 01:03:00 1.050000 6.866667
6 2015-06-29 ponies 2015-06-29 00:00:00 2015-06-29 00:00:00 2015-06-29 00:00:00 00:00:00 0.000000 0.000000
23 2015-06-29 ponies 2015-06-29 17:05:00 2015-06-29 16:55:00 2015-06-29 17:05:00 00:10:00 0.166667 0.166667

In [12]:
to_plot = df[['timestamp', 'activity', 'cumulative']]
to_plot = to_plot.pivot('timestamp', 'activity').ffill().fillna(0)
to_plot.head(15)


Out[12]:
cumulative
activity flying oh the grind ponies serious business unicorns
timestamp
2015-06-28 00:00:00 0 0.000000 0.000000 0 0.000000
2015-06-28 10:25:00 0 0.000000 0.000000 0 3.716667
2015-06-28 12:10:00 0 0.000000 1.750000 0 3.716667
2015-06-28 14:18:00 0 0.000000 1.750000 0 5.850000
2015-06-28 16:28:00 0 0.000000 3.916667 0 5.850000
2015-06-28 17:30:00 0 0.000000 3.916667 0 6.883333
2015-06-28 17:48:00 0 0.000000 4.216667 0 6.883333
2015-06-28 18:57:00 0 0.000000 5.366667 0 6.883333
2015-06-28 19:17:00 0 0.000000 5.700000 0 6.883333
2015-06-28 19:24:00 0 0.000000 5.816667 0 6.883333
2015-06-28 19:29:00 0 0.000000 5.816667 0 6.966667
2015-06-28 20:32:00 0 0.000000 6.866667 0 6.966667
2015-06-28 21:32:00 1 0.000000 6.866667 0 6.966667
2015-06-29 00:00:00 0 0.000000 0.000000 0 0.000000
2015-06-29 12:12:00 0 0.833333 0.000000 0 0.000000

In [15]:
from bokeh.charts import TimeSeries
show(TimeSeries(to_plot))