In [1]:
from operator import or_
import numpy as np
import pandas as pd
from bson import BSON
from pymongo import cursor
from skll.metrics import kappa
from scipy.stats import pearsonr
from sklearn.cluster import MiniBatchKMeans
from sklearn.grid_search import ParameterGrid
from sklearn.naive_bayes import (BernoulliNB,
MultinomialNB)
from sklearn.metrics import (precision_score,
f1_score,
accuracy_score,
confusion_matrix,
average_precision_score)
from sklearn.linear_model import (Perceptron,
PassiveAggressiveRegressor)
from sklearn.feature_extraction import DictVectorizer
from src.features import *
from src.datasets import *
from src.mongodb import *
In [2]:
# Running MongoDB on my own personal server (tunneled to localhost:37017
# in this case)
# Example: ssh -N -f -L localhost:37017:localhost:2700 mulhod@pool-108-24-47-200.cmdnnj.fios.verizon.net
host = 'localhost'
port = 37017
db = connect_to_db(host=host, port=port)
In [3]:
# Number of training/test reviews across all games
db.count()
Out[3]:
In [4]:
# List games that the database contains data for
! ls ../data/*jsonlines | awk -F/ '{print $NF}'
In [5]:
# Let's get a sense for the kind of data that is contained in each document
# (not including the NLP features, which have to be decoded, anyway)
db.find_one({}, {'nlp_features': 0})
Out[5]:
In [6]:
# Review attributes
print('\n'.join(db.find_one({}, {'nlp_features': 0}).keys()))
In [7]:
print('\n'.join(db.find_one({}, {'nlp_features': 0})['achievement_progress'].keys()))
In [8]:
# Let's also take a look at the NLP features that have been extracted
# from the review and stored in the database
nlp_features = (BSON.decode(db
.find_one({}, {'nlp_features': 1, '_id': 0})
.get('nlp_features')))
pd.DataFrame([dict(feature=feature, value=value) for feature, value
in list(nlp_features.items())[:400]])
Out[8]:
In [11]:
# First let's import some code/variables from src.learn (part of this
# package), which will be useful in keeping this notebook clean and
# straightforward
from util.learn import *
In [12]:
# We will use a set of 2 learning algorithms (Perceptron and
# PassiveAgressiveRegressor) with reduced parameter grids
learners = [Perceptron, PassiveAggressiveRegressor]
_DEFAULT_PARAM_GRIDS = {Perceptron: {'alpha': [0.0001, 0.1],
'random_state': [seed]},
PassiveAggressiveRegressor:
{'C': [0.01, 10.0],
'random_state': [seed]}}
In [10]:
# We will use Arma 3 as the game to train/evaluate a model for
game = 'Arma_3'
# We will run 5 rounds of learning with 50 new training samples being
# used in each round.
rounds = 5
n_training_samples = 50
# The model will be tested against the same 50 test reviews each time.
n_test_samples = 500
# Each unique set of values for each learner's parameter grid will be
# evaluated (in this case the only parameter that has multiple values
# is 'alpha' for Pereceptron and 'C' for PassiveAgressiveRegressor,
# so this means that only 4 experiments will be run in total, but
# each one will have its own 5 rounds)
In [11]:
# Besides the NLP features, we will not use any of the review attributes
# in the model
non_nlp_features = set()
# The attribute that we will be predicting on is the number of hours the
# reviewer played (or, more specifically, the "bin" into which the number
# falls when the whole range is broken down into bins)
y_label = 'total_game_hours_bin'
In [12]:
# The objective function we will use to rank the experiments will be
# quadratic weighted kappa
objective = 'qwk'
In [13]:
# Finally, we will also evaluate the majority baseline model to get a
# sense for how the model really performs
In [14]:
# Filter out warnings since there may be a lot of "UndefinedMetricWarning"
# warnings when running IncrementalLearning
import warnings
warnings.filterwarnings("ignore")
In [15]:
Arma_3_inc_learning = \
IncrementalLearning(db,
game,
learners,
[_DEFAULT_PARAM_GRIDS[learner]
for learner in learners],
n_training_samples,
non_nlp_features,
y_label,
objective,
test_limit=n_test_samples,
rounds=rounds,
majority_baseline=True)
In [16]:
# Now, let's take a look at the results
# First, we'll see how the majority baseline model performs
# There is a lot of data collected for each model, so it will be necessary
# here to constrain the analysis
# Let's suppose that we only want to see the following attributes:
# accuracy, precision, Pearson's r, quadratic weighted kappa, and
# confusion matrices
Arma_3_inc_learning.majority_baseline_stats.columns
Out[16]:
In [28]:
Arma_3_inc_learning.majority_baseline_stats[['accuracy',
'precision_weighted',
'pearson_r',
'qwk']]
Out[28]:
In [18]:
# As it turns out, quadratic weighted kappa and Pearson's r won't make
# sense in this case due to the fact that the majority baseline
# predictions are 100% one label, i.e., they're all the value that
# occurred most frequently
# However, accuracy and precision are included
In [19]:
# Let's take a look at the confusion matrix
print(Arma_3_inc_learning
.majority_baseline_stats
.printable_confusion_matrix
.irow(0))
In [21]:
# As you can see, the baseline model predicted 1 in every single instance
In [22]:
# Now, let's rank the experiments by how well the model did in the last
# round
ranked_experiments = \
(Arma_3_inc_learning
.rank_experiments_by_objective(ordering='objective_last_round'))
In [26]:
# Let's find out which experiment did best (this time we'll include a
# little more information than we did for the majority baseline model)
# Here are the possible columns:
print('Experimental attributes:\n\n\t{}'
.format('\n\t'.join(ranked_experiments[0])))
In [30]:
ranked_experiments[0][['learner',
'learning_round',
'accuracy',
'precision_weighted',
'pearson_r',
'qwk']]
Out[30]:
In [31]:
# So, it seems that the PassiveAgressiveRegressor takes the top
# prize
# Let's find out what set of parameters was used
ranked_experiments[0].params.irow(0)
Out[31]:
In [32]:
# When 'C' is set to 0.01 (and everything else is default), this learning
# algorithm seems to do best (in this one case, at least)
In [33]:
# Furthermore, we see a nice increase in performance over time from
# learning round #1 through learning round #5, at least in a general kind
# of way
In [36]:
# Let's see the confusion matrices and how they change over time
for i in range(5):
print('Round #{}\n{}\n\n'.format(i + 1,
ranked_experiments[0]
.printable_confusion_matrix
.irow(i)))
In [37]:
# Little by little, one can see that the model begins to predict 2s
# and 3s
# Indeed, this is what separates the models from the majority baseline
# model
In [52]:
# Lastly, it should be mentioned that the best model does indeed do
# better than the majority baseline model. In terms of accuracy and
# precision, it does significantly better:
print('Precision:\n\n\tmajority baseline model: {}\n\t'
'learning model: {}'
.format(Arma_3_inc_learning
.majority_baseline_stats
.precision_weighted
.irow(0),
ranked_experiments[0]
.precision_weighted
.irow(len(ranked_experiments[0]) - 1)))
print('\nAccuracy:\n\n\tmajority baseline model: {}\n\t'
'learning model: {}'
.format(Arma_3_inc_learning
.majority_baseline_stats
.accuracy
.irow(0),
ranked_experiments[0]
.accuracy
.irow(len(ranked_experiments[0]) - 1)))