In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
data = pd.read_csv('game/game.csv')
In [3]:
data.dtypes
Out[3]:
In [4]:
data.describe()
Out[4]:
In [5]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y = encoder.fit_transform(data['outcome'])
print(y)
In [6]:
data.columns
Out[6]:
In [7]:
from sklearn.base import BaseEstimator, TransformerMixin
class DataEncoder(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
if not isinstance(X, pd.DataFrame):
raise TypeError(
"This transformer only knows how to handle data frames!"
)
self.encoders = [
LabelEncoder().fit(X[column])
for column in X.columns
]
return self
def transform(self, X):
for idx, column in enumerate(X.columns):
X[column] = self.encoders[idx].transform(X[column])
return X
def inverse_transform(self, X):
for idx, column in enumerate(X.columns):
X[column] = self.encoders[idx].inverse_transform(X[column])
return X
In [8]:
X = data[[
"a1", "a2", "a3", "a4", "a5", "a6",
"b1", "b2", "b3", "b4", "b5", "b6",
"c1", "c2", "c3", "c4", "c5", "c6",
"d1", "d2", "d3", "d4", "d5", "d6",
"e1", "e2", "e3", "e4", "e5", "e6",
"f1", "f2", "f3", "f4", "f5", "f6",
"g1", "g2", "g3", "g4", "g5", "g6",
]]
y = data["outcome"]
In [17]:
Xencoder = DataEncoder()
yencoder = LabelEncoder()
X = Xencoder.fit_transform(X)
y = yencoder.fit_transform(y)
In [19]:
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X,y, test_size=.20)
In [23]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
In [31]:
model = GaussianNB()
In [32]:
model.fit(X_train, y_train)
Out[32]:
In [33]:
model.score(X_test, y_test)
Out[33]:
In [28]:
from sklearn.metrics import classification_report
In [34]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))
In [35]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=yencoder.classes_))