In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [39]:
from collections import OrderedDict
from IPython.display import display, Markdown
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import learn
from monitoring.data import get_production_data
from monitoring.visualization import display_level_overview
sns.set()
pd.options.display.float_format = '{:.2f}'.format
In [3]:
# Load data from local cache, fetch and store if not available.
data = get_production_data('2018-06-21')
In [4]:
ts = data['task_sessions']
ts = ts[ts.time_spent > 0]
ts = ts.assign(date=ts.end.str[:10])
In [5]:
grouped_ts = ts.groupby('date')
metrics = pd.DataFrame(OrderedDict(
active_students=grouped_ts.student.nunique(),
solving_hours=grouped_ts.time_spent.sum() / 3600,
solved_count=grouped_ts.solved.sum(),
success_rate=grouped_ts.solved.mean(),
)).sort_index()
metrics.index = pd.to_datetime(metrics.index)
In [6]:
metrics.active_students.resample('1W').mean().plot()
Out[6]:
In [7]:
metrics.solving_hours.resample('1W').mean().plot()
Out[7]:
In [8]:
metrics.solved_count.resample('1W').mean().plot()
Out[8]:
In [9]:
metrics.success_rate.resample('1W').mean().plot(ylim=[0,1])
Out[9]:
In [10]:
ts.time_spent.apply(np.log).hist()
Out[10]:
In [6]:
grouped_ts = ts.groupby('task')
metrics = pd.DataFrame(OrderedDict(
time=grouped_ts.time_spent.median(),
success=grouped_ts.solved.mean(),
n_attempts=grouped_ts.task.count(),
n_solved=grouped_ts.solved.sum(),
))
metrics['n_unsolved'] = metrics.n_attempts - metrics.n_solved
tasks = data['tasks'].join(metrics).fillna(0)
In [7]:
ordered_tasks = tasks.sort_values('n_attempts', ascending=False)
ax = ordered_tasks[['n_solved', 'n_unsolved']].plot.bar(
title='Attempts',
stacked=True,
color=[sns.xkcd_rgb['medium green'], sns.xkcd_rgb['pale red']],
figsize=(9,6))
ax.set_xticklabels([])
ax.set_xlabel('task');
In [13]:
ax = tasks.success.plot.hist(title='success rate')
In [14]:
ax = tasks.time.plot.hist(title='median time')
In [15]:
tasks.plot.scatter(x='success', y='time', logy=True)
Out[15]:
In [55]:
grouped_tasks = tasks.groupby('mission')
missions = pd.DataFrame(OrderedDict(
level=grouped_tasks['level'].min(),
time=grouped_tasks.time.median(), # median of medians
success=grouped_tasks.success.mean(),
n_attempts=grouped_tasks.n_attempts.sum(),
n_solved=grouped_tasks.n_solved.sum(),
n_tasks=grouped_tasks.name.count(),
)).reset_index()
missions = missions.rename(columns={'mission': 'name'})
missions = missions.sort_values(by='level')
missions = missions[
['level', 'name', 'n_tasks', 'n_attempts', 'success', 'time']]
display_level_overview(missions, order_by='level')
In [57]:
def display_plot(ax):
display(ax)
plt.show() # forcing to display here
def display_success_rate_hist(tasks):
ax = tasks.success.plot.hist(
bins=np.linspace(0, 1, 11),
title='success rate')
ax.set_xlim(0, 1)
ax.set_ylim(0, 10)
display_plot(ax)
def display_time_vs_success(tasks):
ax = tasks.plot.scatter(x='success', y='time')
ax.set_xlim(0, 1)
display_plot(ax)
def analyze_level(name):
ts = tasks[tasks.mission == name]
ts = ts[['name', 'level2', 'order',
'n_attempts', 'success', 'time']]
display(Markdown('# {name}'.format(name=name)))
display_level_overview(ts.reset_index(), order_by=['level2', 'order'])
display_success_rate_hist(ts)
display_time_vs_success(ts)
display(Markdown('-----'))
for name in missions.name:
analyze_level(name)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: