Recomend Better Postion Who Have Multy Position Player

- Using Decision Tree, Gaussian and Ensemble

Index

  1. Connect DB and Make QUERY
  2. Make Pandas DataFrame Each Position Player
  3. Set Position Category and Concat Each Datafream
  4. Make Training and Test Data
  5. Make Decision Tree Classifier Model
  6. Check Confusion Matrix
  7. Cecck Classification Report
  8. Recomend Position

Import Package


In [2]:
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import MySQLdb

from sklearn.tree import export_graphviz
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
1. Connect DB and Make QUERY

In [3]:
db = MySQLdb.connect(
    "db.fastcamp.us",
    "root",
    "dkstncks",
    "football",
    charset='utf8',
)

def make_query(position):
    """
    
    parameter------------
    position : M, D, F, G
    
    return---------------
    SQL_QUERY String
    
    """
    
    SQL_QUERY = """
        SELECT *
        FROM player
    """
    
    if position == "F":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%FW%" and mins > 270
        """
    
    if position == "M":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%M%" and mins > 270
        """
    
    if position == "D":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%D%" and position not like " DMC"  and mins > 270
        """
    
    if position == "G":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%G%" and mins > 270
        """
    
    return SQL_QUERY
2. Make Pandas DataFrame Each Position Player

In [4]:
# forword
SQL_QUERY = make_query("F")
forword_df = pd.read_sql(SQL_QUERY, db)

# midfilder
SQL_QUERY = make_query("M")
midfilder_df = pd.read_sql(SQL_QUERY, db)

# defencer
SQL_QUERY = make_query("D")
defencer_df = pd.read_sql(SQL_QUERY, db)

# goalkeeper
SQL_QUERY = make_query("G")
goalkeeper_df = pd.read_sql(SQL_QUERY, db)

len(forword_df), len(midfilder_df), len(defencer_df), len(goalkeeper_df)


Out[4]:
(291, 997, 817, 213)
3. Set Position Category and Concat Each Datafream

In [5]:
forword_df["position"] = 0
forword_df

midfilder_df["position"] = 1
midfilder_df

defencer_df["position"] = 2
defencer_df

goalkeeper_df["position"] = 3
goalkeeper_df

concated_df = pd.concat([forword_df, midfilder_df, defencer_df, goalkeeper_df])
concated_df.tail()


Out[5]:
tall weight apps_sub mins goals assists spg ps_x motm aw ... clear drb owng keyp_x fouled off disp unstch avgp position
208 187 82 0 2430 0 0 0.0 59.7 0 0.5 ... 1.2 0.0 0 0.0 0.3 0.0 0.0 0.1 25.5 3
209 198 83 2 1397 0 0 0.0 47.0 0 0.4 ... 1.1 0.1 0 0.0 0.2 0.0 0.0 0.0 19.8 3
210 199 91 0 2020 0 0 0.0 48.2 0 0.3 ... 1.1 0.0 0 0.0 0.0 0.0 0.0 0.0 22.7 3
211 188 78 0 2970 0 0 0.0 54.2 0 0.2 ... 0.8 0.1 0 0.0 0.2 0.0 0.0 0.0 28.9 3
212 185 81 0 450 0 0 0.0 62.8 0 0.2 ... 0.8 0.0 0 0.2 0.0 0.0 0.0 0.0 27.4 3

5 rows × 23 columns

4. Make Training and Test Data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(concated_df.ix[:,:-1], concated_df.ix[:,-1], test_size=0.2, random_state=1)
5. Make Decision Tree Classifier Model

In [7]:
from sklearn.tree import DecisionTreeClassifier
model_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3).fit(X_train, y_train)

In [8]:
model_gini = DecisionTreeClassifier(criterion='gini', max_depth=3).fit(X_train, y_train)

In [9]:
from sklearn.naive_bayes import GaussianNB
model_gaussian = GaussianNB().fit(X_train, y_train)

In [10]:
from sklearn.ensemble import VotingClassifier
clf1 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
clf2 = DecisionTreeClassifier(criterion='gini', max_depth=3)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('entropy', clf1), ('gini', clf2), ('naive', clf3)], voting='soft', weights=[2, 1, 1])
model_ensemble = eclf.fit(X_train, y_train)
6. Check Confusion Matrix

In [20]:
cm_entropy = confusion_matrix(y_test, model_entropy.predict(X_test))
cm_gini = confusion_matrix(y_test, model_gini.predict(X_test))
cm_gaussian = confusion_matrix(y_test, model_gaussian.predict(X_test))
cm_ensemble = confusion_matrix(y_test, model_ensemble.predict(X_test))

print("entropy"+"="*12)
print(cm_entropy)
print("gini"+"="*15)
print(cm_gini)
print("gaussian"+"="*11)
print(cm_gaussian)
print("ensemble"+"="*11)
print(cm_ensemble)


entropy============
[[ 50   1   0   1]
 [ 41 163  16   0]
 [  0  20 138   0]
 [  0   0   0  34]]
gini===============
[[ 46   5   1   0]
 [ 33 175  12   0]
 [  0  23 134   1]
 [  0   0   0  34]]
gaussian===========
[[ 50   2   0   0]
 [ 22 167  31   0]
 [  1  15 142   0]
 [  0   0   0  34]]
ensemble===========
[[ 49   2   1   0]
 [ 22 183  14   1]
 [  0  20 138   0]
 [  0   0   0  34]]
7. Cecck Classification Report

In [22]:
print("entropy"+"="*50)
print(classification_report(y_test, model_entropy.predict(X_test)))

print("gini"+"="*50)
print(classification_report(y_test, model_gini.predict(X_test)))

print("gaussian"+"="*50)
print(classification_report(y_test, model_gaussian.predict(X_test)))

print("ensemble"+"="*50)
print(classification_report(y_test, model_ensemble.predict(X_test)))


entropy==================================================
             precision    recall  f1-score   support

          0       0.55      0.96      0.70        52
          1       0.89      0.74      0.81       220
          2       0.90      0.87      0.88       158
          3       0.97      1.00      0.99        34

avg / total       0.86      0.83      0.83       464

gini==================================================
             precision    recall  f1-score   support

          0       0.58      0.88      0.70        52
          1       0.86      0.80      0.83       220
          2       0.91      0.85      0.88       158
          3       0.97      1.00      0.99        34

avg / total       0.86      0.84      0.84       464

gaussian==================================================
             precision    recall  f1-score   support

          0       0.68      0.96      0.80        52
          1       0.91      0.76      0.83       220
          2       0.82      0.90      0.86       158
          3       1.00      1.00      1.00        34

avg / total       0.86      0.85      0.85       464

ensemble==================================================
             precision    recall  f1-score   support

          0       0.69      0.94      0.80        52
          1       0.89      0.83      0.86       220
          2       0.90      0.87      0.89       158
          3       0.97      1.00      0.99        34

avg / total       0.88      0.87      0.87       464

8. Recomend Position

In [23]:
SQL_QUERY = """
    SELECT 
        tall, weight, apps_sub, mins, goals, assists
        , spg, ps_x, motm, aw, tackles, inter, fouls, clear, drb
        , owng, keyp_x, fouled, off, disp, unstch, avgp, position
    FROM player
    WHERE position like "%,%" and mins > 270
    ;
"""

many_position_player_df = pd.read_sql(SQL_QUERY, db)
len(many_position_player_df)


Out[23]:
586

In [25]:
predict_data = model_ensemble.predict(many_position_player_df.ix[:,:-1])
many_position_player_df["recomend_position"] = predict_data

In [26]:
# Recomend Result
# 0 : Forword, 1 : Midfilder, 2 : Defencer, 3 : Goalkeeper
many_position_player_df.ix[:10,-2:]


Out[26]:
position recomend_position
0 M(CLR),FW 1
1 D(C),DMC 1
2 D(LR),M(CR) 2
3 D(L),M(L) 1
4 D(C),M(C) 2
5 D(C),M(C) 1
6 AM(L),FW 0
7 AM(CLR),FW 1
8 D(C),M(CLR) 1
9 AM(CR),FW 0
10 AM(CLR),FW 0

In [ ]: