In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from ggplot import *
import rating_utils
%matplotlib inline
In [2]:
data = pd.read_csv('data/12-13.csv')
indices = np.random.rand(len(data)) < 0.7
train = data[indices].reset_index()
test = data[~indices].reset_index()
print rating_utils.get_teams(data)
print len(train), len(test)
Let's plot the score difference distribution
In [3]:
ggplot(pd.DataFrame({'score_difference': data['HomePTS'] - data['VisitorPTS']}), aes(x='score_difference')) \
+ geom_histogram(fill='#33bbff', binwidth=2) \
+ ylab('match count')
Out[3]:
In [4]:
random_model = rating_utils.RandomRating()
random_model.fit(train, rating_utils.get_teams(data))
print random_model.ratings
plt.hist(random_model.ratings)
pass
We now compute the prediction accuracy and cross entropy loss
In [5]:
print "Training and testing accuracy:"
print random_model.test_accuracy(train), random_model.test_accuracy(test)
print "\nTraining and testing cross entropy loss:"
print random_model.test_cross_entropy_loss(train), random_model.test_cross_entropy_loss(test)
In [6]:
massey_model = rating_utils.MasseyMethod()
massey_model.fit(train, rating_utils.get_teams(data))
print massey_model.ratings
plt.hist(massey_model.ratings)
pass
We now compute the prediction accuracy and cross entropy loss
In [7]:
print "Training and testing accuracy:"
print massey_model.test_accuracy(train), massey_model.test_accuracy(test)
print "\nTraining and testing cross entropy loss:"
print massey_model.test_cross_entropy_loss(train), massey_model.test_cross_entropy_loss(test)
In [8]:
markov_match_model = rating_utils.MarkovMatchMethod()
markov_match_model.fit(train, rating_utils.get_teams(data))
print markov_match_model.ratings
plt.hist(markov_match_model.ratings)
pass
We now compute the prediction accuracy and cross entropy loss
In [9]:
print "Training and testing accuracy:"
print markov_match_model.test_accuracy(train), markov_match_model.test_accuracy(test)
print "\nTraining and testing cross entropy loss:"
print markov_match_model.test_cross_entropy_loss(train), markov_match_model.test_cross_entropy_loss(test)
In [10]:
markov_score_model = rating_utils.MarkovScoreMethod()
markov_score_model.fit(train, rating_utils.get_teams(data))
print markov_score_model.ratings
plt.hist(markov_score_model.ratings)
pass
We now compute the prediction accuracy and cross entropy loss
In [11]:
print "Training and testing accuracy:"
print markov_score_model.test_accuracy(train), markov_score_model.test_accuracy(test)
print "\nTraining and testing cross entropy loss:"
print markov_score_model.test_cross_entropy_loss(train), markov_score_model.test_cross_entropy_loss(test)
In [12]:
colley_model = rating_utils.ColleyMethod()
colley_model.fit(train, rating_utils.get_teams(data))
print colley_model.ratings
plt.hist(colley_model.ratings)
pass
We now compute the prediction accuracy and cross entropy loss
In [13]:
print "Training and testing accuracy:"
print colley_model.test_accuracy(train), colley_model.test_accuracy(test)
print "\nTraining and testing cross entropy loss:"
print colley_model.test_cross_entropy_loss(train), colley_model.test_cross_entropy_loss(test)