In [33]:
%matplotlib qt4
from __future__ import division
from models import tools, filters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
In [78]:
data = tools.load_data(limit=1e6, offset=1e6)
In [104]:
places = tools.load_places()
place_types = tools.load_place_types(index_col='id')
In [79]:
spaced = data[filters.spaced_presentations(data)]
print len(spaced)
In [80]:
massed = data[filters.massed_presentations(data)]
print len(massed)
In [81]:
users_spaced = list(set(spaced['user_id']))
users_massed = list(set(massed['user_id']))
In [83]:
massed_data = data[data['user_id'] == users_massed[15]].copy()
spaced_data = data[data['user_id'] == users_spaced[23]].copy()
first_massed = min(massed_data['inserted'])
first_spaced = min(spaced_data['inserted'])
def get_days_massed(d):
return int((d - first_massed).total_seconds() / (3600*24))
def get_days_spaced(d):
return int((d - first_spaced).total_seconds() / (3600*24))
massed_data['day'] = massed_data['inserted'].map(get_days_massed)
spaced_data['day'] = spaced_data['inserted'].map(get_days_spaced)
massed_days = np.arange(0, max(massed_data['day']))
spaced_days = np.arange(0, max(spaced_data['day']))
f, (ax1, ax2) = plt.subplots(2, figsize=(10, 8))
sns.factorplot(x="day", data=massed_data, kind="count",
size=8, aspect=1.5, order=massed_days, ax=ax1)
sns.factorplot(x="day", data=spaced_data, kind="count",
size=8, aspect=1.5, order=spaced_days, ax=ax2)
f.tight_layout()
In [112]:
places_as_dict = places.T.to_dict()
place_types_as_dict = place_types.T.to_dict()
def to_place_type(row):
return tools.to_place_type(
row['place_id'],
places=places_as_dict,
place_types=place_types_as_dict,
)
user_data['type name'] = user_data.apply(to_place_type, axis=1)
In [ ]: