In [172]:
%matplotlib inline
from matplotlib import pyplot
from collections import defaultdict
from pandas.io.parsers import read_csv
import numpy as np
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 6.0)
users = read_csv('mxit_app_data/users.csv')
users
Out[172]:
In [13]:
quiz = read_csv('mxit_app_data/quiz.csv')
quiz
Out[13]:
Scatter plot of scores over time
In [14]:
user_count = defaultdict(int)
for q in quiz.iterrows():
if q[1].get('end_time'):
user_count[q[1].get('user_id')]+=1
#user_scores[q.get('user_id')].append(q.get('score'))
In [136]:
counts = list((a[1], a[0]) for a in user_count.iteritems() )
counts.sort(reverse=True, key=lambda x: x[0])
print counts[:20]
In [72]:
scores_data = read_csv('mxit_app_data/scores.csv')
all_user_scores = defaultdict(list)
for s in scores_data.iterrows():
all_user_scores[s[1].user_id].append((s[1].score, s[1].quiz_type, s[1].difficulty))
def user_scores(user_id):
user_scores = []
for s in scores_data.iterrows():
if s[1].user_id == user_id:
user_scores.append((s[1].score, s[1].quiz_type, s[1].difficulty))
return user_scores
In [186]:
import math
for figure, i in enumerate((c[1] for c in counts[:200])):
for d in range(3):
ys = [s[0] for s in all_user_scores[i] if not math.isnan( s[0]) and s[1] == 'all' and s[2] == d]
r = arange(len(ys))
y = array(ys)
if len(ys)<20:
continue
slope, intercept, r_value, p_value, std_err = stats.linregress(r,y)
if r_value > 0.2: #r_value < 0.6:
continue
var = stats.nanstd(y)
print r_value
fig = pyplot.figure(figure*2+d-1)
ax = fig.add_axes([0,0,1,1])
pyplot.text(0.65, 0.15, 'r=%.3f' % r_value, transform=ax.transAxes, fontsize=16)
pyplot.text(0.65, 0.1, 'user id: %d' % i, transform=ax.transAxes, fontsize=16)
pyplot.text(0.65, 0.2, 'std deviation: %.2f' % var, transform=ax.transAxes, fontsize=16)
pyplot.title('Score over time')
pyplot.ylabel('Score')
pyplot.xlabel('Game index')
pyplot.plot(r, slope*r+intercept, color='r', linewidth=2.0)
pyplot.scatter(range(len(ys)), ys)
pyplot.savefig('%d-%d.png' % (i, d), bbox_inches='tight')
#pyplot.show()
In [101]:
from numpy import arange,array,ones
from pylab import plot,show
from scipy import stats
xi = arange(len(ys))
y = array(ys)
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
print slope, intercept, r_value, p_value, std_err
In [ ]: