In [33]:
%matplotlib qt4

from __future__ import division

from models import tools, filters

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

In [78]:
data = tools.load_data(limit=1e6, offset=1e6)


Loaded 962691 answers.

In [104]:
places = tools.load_places()
place_types = tools.load_place_types(index_col='id')

In [79]:
spaced = data[filters.spaced_presentations(data)]
print len(spaced)


169861

In [80]:
massed = data[filters.massed_presentations(data)]
print len(massed)


70541

In [81]:
users_spaced = list(set(spaced['user_id']))
users_massed = list(set(massed['user_id']))

In [83]:
massed_data = data[data['user_id'] == users_massed[15]].copy()
spaced_data = data[data['user_id'] == users_spaced[23]].copy()

first_massed = min(massed_data['inserted'])
first_spaced = min(spaced_data['inserted'])

def get_days_massed(d):
    return int((d - first_massed).total_seconds() / (3600*24))

def get_days_spaced(d):
    return int((d - first_spaced).total_seconds() / (3600*24))

massed_data['day'] = massed_data['inserted'].map(get_days_massed)
spaced_data['day'] = spaced_data['inserted'].map(get_days_spaced)

massed_days = np.arange(0, max(massed_data['day']))
spaced_days = np.arange(0, max(spaced_data['day']))

f, (ax1, ax2) = plt.subplots(2, figsize=(10, 8))

sns.factorplot(x="day", data=massed_data, kind="count",
               size=8, aspect=1.5, order=massed_days, ax=ax1)
sns.factorplot(x="day", data=spaced_data, kind="count",
               size=8, aspect=1.5, order=spaced_days, ax=ax2)

f.tight_layout()

In [112]:
places_as_dict = places.T.to_dict()
place_types_as_dict = place_types.T.to_dict()

def to_place_type(row):
    return tools.to_place_type(
        row['place_id'],
        places=places_as_dict,
        place_types=place_types_as_dict,
    )

user_data['type name'] = user_data.apply(to_place_type, axis=1)

In [ ]: