Game Classifier



In [1]:

    
%matplotlib inline

import os

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt



In [2]:

    
data = pd.read_csv('game/game.csv')



In [3]:

    
data.dtypes









    Out[3]:





a1         object
a2         object
a3         object
a4         object
a5         object
a6         object
b1         object
b2         object
b3         object
b4         object
b5         object
b6         object
c1         object
c2         object
c3         object
c4         object
c5         object
c6         object
d1         object
d2         object
d3         object
d4         object
d5         object
d6         object
e1         object
e2         object
e3         object
e4         object
e5         object
e6         object
f1         object
f2         object
f3         object
f4         object
f5         object
f6         object
g1         object
g2         object
g3         object
g4         object
g5         object
g6         object
outcome    object
dtype: object



In [4]:

    
data.describe()









    Out[4]:






  
    
      
      a1
      a2
      a3
      a4
      a5
      a6
      b1
      b2
      b3
      b4
      ...
      f4
      f5
      f6
      g1
      g2
      g3
      g4
      g5
      g6
      outcome
    
  
  
    
      count
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      ...
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
      67557
    
    
      unique
      3
      3
      3
      3
      3
      3
      3
      3
      3
      3
      ...
      3
      3
      3
      3
      3
      3
      3
      3
      3
      3
    
    
      top
      b
      b
      b
      b
      b
      b
      x
      b
      b
      b
      ...
      b
      b
      b
      b
      b
      b
      b
      b
      b
      win
    
    
      freq
      24982
      43385
      55333
      61616
      65265
      67040
      25889
      41180
      54352
      61206
      ...
      64839
      66819
      67469
      29729
      48104
      58869
      64301
      66710
      67465
      44473
    
  

4 rows × 43 columns



In [5]:

    
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y = encoder.fit_transform(data['outcome'])
print(y)









    



[2 2 2 ..., 1 0 0]



In [6]:

    
data.columns









    Out[6]:





Index(['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6',
       'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'd1', 'd2', 'd3', 'd4', 'd5', 'd6',
       'e1', 'e2', 'e3', 'e4', 'e5', 'e6', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6',
       'g1', 'g2', 'g3', 'g4', 'g5', 'g6', 'outcome'],
      dtype='object')



In [7]:

    
from sklearn.base import BaseEstimator, TransformerMixin 

class DataEncoder(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise TypeError(
                "This transformer only knows how to handle data frames!"
            )
        
        self.encoders = [
            LabelEncoder().fit(X[column])
            for column in X.columns
        ]
        return self
    
    def transform(self, X):
        for idx, column in enumerate(X.columns):
            X[column] = self.encoders[idx].transform(X[column])
        return X
    
    def inverse_transform(self, X):
        for idx, column in enumerate(X.columns):
            X[column] = self.encoders[idx].inverse_transform(X[column])
        return X



In [8]:

    
X = data[[
    "a1", "a2", "a3", "a4", "a5", "a6",
    "b1", "b2", "b3", "b4", "b5", "b6",
    "c1", "c2", "c3", "c4", "c5", "c6",
    "d1", "d2", "d3", "d4", "d5", "d6",
    "e1", "e2", "e3", "e4", "e5", "e6",
    "f1", "f2", "f3", "f4", "f5", "f6",
    "g1", "g2", "g3", "g4", "g5", "g6",
]]

y = data["outcome"]



In [17]:

    
Xencoder = DataEncoder()
yencoder = LabelEncoder() 
X = Xencoder.fit_transform(X)
y = yencoder.fit_transform(y)



In [19]:

    
from sklearn.model_selection import train_test_split as tts 

X_train, X_test, y_train, y_test = tts(X,y, test_size=.20)



In [23]:

    
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression



In [31]:

    
model = GaussianNB()



In [32]:

    
model.fit(X_train, y_train)









    Out[32]:





GaussianNB(priors=None)



In [33]:

    
model.score(X_test, y_test)









    Out[33]:





0.61108644168146831



In [28]:

    
from sklearn.metrics import classification_report



In [34]:

    
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))









    



             precision    recall  f1-score   support

       draw       0.13      0.10      0.12      1307
       loss       0.38      0.20      0.26      3258
        win       0.69      0.83      0.76      8947

avg / total       0.56      0.61      0.58     13512



In [35]:

    
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))









    



0.636693309651
             precision    recall  f1-score   support

       draw       0.16      0.10      0.12      1307
       loss       0.42      0.04      0.07      3258
        win       0.67      0.93      0.78      8947

avg / total       0.56      0.64      0.55     13512

	a1	a2	a3	a4	a5	a6	b1	b2	b3	b4	...	f4	f5	f6	g1	g2	g3	g4	g5	g6	outcome
count	67557	67557	67557	67557	67557	67557	67557	67557	67557	67557	...	67557	67557	67557	67557	67557	67557	67557	67557	67557	67557
unique	3	3	3	3	3	3	3	3	3	3	...	3	3	3	3	3	3	3	3	3	3
top	b	b	b	b	b	b	x	b	b	b	...	b	b	b	b	b	b	b	b	b	win
freq	24982	43385	55333	61616	65265	67040	25889	41180	54352	61206	...	64839	66819	67469	29729	48104	58869	64301	66710	67465	44473