Game Classifier


In [1]:
%matplotlib inline

import os

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('game/game.csv')

In [3]:
data.dtypes


Out[3]:
a1         object
a2         object
a3         object
a4         object
a5         object
a6         object
b1         object
b2         object
b3         object
b4         object
b5         object
b6         object
c1         object
c2         object
c3         object
c4         object
c5         object
c6         object
d1         object
d2         object
d3         object
d4         object
d5         object
d6         object
e1         object
e2         object
e3         object
e4         object
e5         object
e6         object
f1         object
f2         object
f3         object
f4         object
f5         object
f6         object
g1         object
g2         object
g3         object
g4         object
g5         object
g6         object
outcome    object
dtype: object

In [4]:
data.describe()


Out[4]:
a1 a2 a3 a4 a5 a6 b1 b2 b3 b4 ... f4 f5 f6 g1 g2 g3 g4 g5 g6 outcome
count 67557 67557 67557 67557 67557 67557 67557 67557 67557 67557 ... 67557 67557 67557 67557 67557 67557 67557 67557 67557 67557
unique 3 3 3 3 3 3 3 3 3 3 ... 3 3 3 3 3 3 3 3 3 3
top b b b b b b x b b b ... b b b b b b b b b win
freq 24982 43385 55333 61616 65265 67040 25889 41180 54352 61206 ... 64839 66819 67469 29729 48104 58869 64301 66710 67465 44473

4 rows × 43 columns


In [5]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y = encoder.fit_transform(data['outcome'])
print(y)


[2 2 2 ..., 1 0 0]

In [6]:
data.columns


Out[6]:
Index(['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6',
       'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'd1', 'd2', 'd3', 'd4', 'd5', 'd6',
       'e1', 'e2', 'e3', 'e4', 'e5', 'e6', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6',
       'g1', 'g2', 'g3', 'g4', 'g5', 'g6', 'outcome'],
      dtype='object')

In [7]:
from sklearn.base import BaseEstimator, TransformerMixin 

class DataEncoder(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise TypeError(
                "This transformer only knows how to handle data frames!"
            )
        
        self.encoders = [
            LabelEncoder().fit(X[column])
            for column in X.columns
        ]
        return self
    
    def transform(self, X):
        for idx, column in enumerate(X.columns):
            X[column] = self.encoders[idx].transform(X[column])
        return X
    
    def inverse_transform(self, X):
        for idx, column in enumerate(X.columns):
            X[column] = self.encoders[idx].inverse_transform(X[column])
        return X

In [8]:
X = data[[
    "a1", "a2", "a3", "a4", "a5", "a6",
    "b1", "b2", "b3", "b4", "b5", "b6",
    "c1", "c2", "c3", "c4", "c5", "c6",
    "d1", "d2", "d3", "d4", "d5", "d6",
    "e1", "e2", "e3", "e4", "e5", "e6",
    "f1", "f2", "f3", "f4", "f5", "f6",
    "g1", "g2", "g3", "g4", "g5", "g6",
]]

y = data["outcome"]

In [17]:
Xencoder = DataEncoder()
yencoder = LabelEncoder() 
X = Xencoder.fit_transform(X)
y = yencoder.fit_transform(y)

In [19]:
from sklearn.model_selection import train_test_split as tts 

X_train, X_test, y_train, y_test = tts(X,y, test_size=.20)

In [23]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [31]:
model = GaussianNB()

In [32]:
model.fit(X_train, y_train)


Out[32]:
GaussianNB(priors=None)

In [33]:
model.score(X_test, y_test)


Out[33]:
0.61108644168146831

In [28]:
from sklearn.metrics import classification_report

In [34]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))


             precision    recall  f1-score   support

       draw       0.13      0.10      0.12      1307
       loss       0.38      0.20      0.26      3258
        win       0.69      0.83      0.76      8947

avg / total       0.56      0.61      0.58     13512


In [35]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))


0.636693309651
             precision    recall  f1-score   support

       draw       0.16      0.10      0.12      1307
       loss       0.42      0.04      0.07      3258
        win       0.67      0.93      0.78      8947

avg / total       0.56      0.64      0.55     13512