In [1]:
import pandas as pd
In [2]:
path = '../.server/media/exports/robomission-2017-11-17/{entity}.csv'
load_entity = lambda name: pd.read_csv(path.format(entity=name), index_col='id')
In [4]:
load_entity('tasks').head()
Out[4]:
In [4]:
load_entity('levels')
Out[4]:
In [5]:
load_entity('toolboxes')
Out[5]:
In [6]:
load_entity('students').head()
Out[6]:
In [7]:
load_entity('task_sessions').head()
Out[7]:
In [8]:
snapshots = load_entity('program_snapshots')
snapshots.head(14)
Out[8]:
{'edit', 'execution'}correct field is only set for execution snapshotsorder and time_delta are computed per-granularitytime_delta = number of seconds from the last snapshot of the same granularitytime_from_start = number of seconds from the start of the task sessionActions time series describes all actions we model. State of all other entities is given by the initial state (static data, such as tasks and levels) and the actions. Form most analyses, it's easier to use some derived data (such as task sessions) instead of raw actions.
Actions are semi-structured: there are some common fields (e.g name, time, student, task)
and an unstructured action-specific dictionary in the data column.
Note: Although task was decided to be a common field, not all actions have it
(namely watch-instruction don't).
As a result, there are some NaNs in this column and pandas use floats for them
(it's not possible to have integers and NaNs in the same array).
In [9]:
actions = load_entity('actions')
actions[20:30]
Out[9]:
In [10]:
# Currently, there are only 4 types of action:
set(actions.name)
Out[10]: