Recomend Better Postion Who Have Multy Position Player

- Using Decision Tree, Gaussian and Ensemble

Index

Connect DB and Make QUERY
Make Pandas DataFrame Each Position Player
Set Position Category and Concat Each Datafream
Make Training and Test Data
Make Decision Tree Classifier Model
Check Confusion Matrix
Cecck Classification Report
Recomend Position

Import Package



In [2]:

    
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import MySQLdb

from sklearn.tree import export_graphviz
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

1. Connect DB and Make QUERY



In [3]:

    
db = MySQLdb.connect(
    "db.fastcamp.us",
    "root",
    "dkstncks",
    "football",
    charset='utf8',
)

def make_query(position):
    """
    
    parameter------------
    position : M, D, F, G
    
    return---------------
    SQL_QUERY String
    
    """
    
    SQL_QUERY = """
        SELECT *
        FROM player
    """
    
    if position == "F":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%FW%" and mins > 270
        """
    
    if position == "M":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%M%" and mins > 270
        """
    
    if position == "D":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%D%" and position not like " DMC"  and mins > 270
        """
    
    if position == "G":
        SQL_QUERY += """
            WHERE position not like "%,%" and position like "%G%" and mins > 270
        """
    
    return SQL_QUERY

2. Make Pandas DataFrame Each Position Player



In [4]:

    
# forword
SQL_QUERY = make_query("F")
forword_df = pd.read_sql(SQL_QUERY, db)

# midfilder
SQL_QUERY = make_query("M")
midfilder_df = pd.read_sql(SQL_QUERY, db)

# defencer
SQL_QUERY = make_query("D")
defencer_df = pd.read_sql(SQL_QUERY, db)

# goalkeeper
SQL_QUERY = make_query("G")
goalkeeper_df = pd.read_sql(SQL_QUERY, db)

len(forword_df), len(midfilder_df), len(defencer_df), len(goalkeeper_df)









    Out[4]:





(291, 997, 817, 213)

3. Set Position Category and Concat Each Datafream



In [5]:

    
forword_df["position"] = 0
forword_df

midfilder_df["position"] = 1
midfilder_df

defencer_df["position"] = 2
defencer_df

goalkeeper_df["position"] = 3
goalkeeper_df

concated_df = pd.concat([forword_df, midfilder_df, defencer_df, goalkeeper_df])
concated_df.tail()









    Out[5]:






  
    
      
      tall
      weight
      apps_sub
      mins
      goals
      assists
      spg
      ps_x
      motm
      aw
      ...
      clear
      drb
      owng
      keyp_x
      fouled
      off
      disp
      unstch
      avgp
      position
    
  
  
    
      208
      187
      82
      0
      2430
      0
      0
      0.0
      59.7
      0
      0.5
      ...
      1.2
      0.0
      0
      0.0
      0.3
      0.0
      0.0
      0.1
      25.5
      3
    
    
      209
      198
      83
      2
      1397
      0
      0
      0.0
      47.0
      0
      0.4
      ...
      1.1
      0.1
      0
      0.0
      0.2
      0.0
      0.0
      0.0
      19.8
      3
    
    
      210
      199
      91
      0
      2020
      0
      0
      0.0
      48.2
      0
      0.3
      ...
      1.1
      0.0
      0
      0.0
      0.0
      0.0
      0.0
      0.0
      22.7
      3
    
    
      211
      188
      78
      0
      2970
      0
      0
      0.0
      54.2
      0
      0.2
      ...
      0.8
      0.1
      0
      0.0
      0.2
      0.0
      0.0
      0.0
      28.9
      3
    
    
      212
      185
      81
      0
      450
      0
      0
      0.0
      62.8
      0
      0.2
      ...
      0.8
      0.0
      0
      0.2
      0.0
      0.0
      0.0
      0.0
      27.4
      3
    
  

5 rows × 23 columns

4. Make Training and Test Data



In [6]:

    
X_train, X_test, y_train, y_test = train_test_split(concated_df.ix[:,:-1], concated_df.ix[:,-1], test_size=0.2, random_state=1)

5. Make Decision Tree Classifier Model



In [7]:

    
from sklearn.tree import DecisionTreeClassifier
model_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3).fit(X_train, y_train)



In [8]:

    
model_gini = DecisionTreeClassifier(criterion='gini', max_depth=3).fit(X_train, y_train)



In [9]:

    
from sklearn.naive_bayes import GaussianNB
model_gaussian = GaussianNB().fit(X_train, y_train)



In [10]:

    
from sklearn.ensemble import VotingClassifier
clf1 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
clf2 = DecisionTreeClassifier(criterion='gini', max_depth=3)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('entropy', clf1), ('gini', clf2), ('naive', clf3)], voting='soft', weights=[2, 1, 1])
model_ensemble = eclf.fit(X_train, y_train)

6. Check Confusion Matrix



In [20]:

    
cm_entropy = confusion_matrix(y_test, model_entropy.predict(X_test))
cm_gini = confusion_matrix(y_test, model_gini.predict(X_test))
cm_gaussian = confusion_matrix(y_test, model_gaussian.predict(X_test))
cm_ensemble = confusion_matrix(y_test, model_ensemble.predict(X_test))

print("entropy"+"="*12)
print(cm_entropy)
print("gini"+"="*15)
print(cm_gini)
print("gaussian"+"="*11)
print(cm_gaussian)
print("ensemble"+"="*11)
print(cm_ensemble)









    



entropy============
[[ 50   1   0   1]
 [ 41 163  16   0]
 [  0  20 138   0]
 [  0   0   0  34]]
gini===============
[[ 46   5   1   0]
 [ 33 175  12   0]
 [  0  23 134   1]
 [  0   0   0  34]]
gaussian===========
[[ 50   2   0   0]
 [ 22 167  31   0]
 [  1  15 142   0]
 [  0   0   0  34]]
ensemble===========
[[ 49   2   1   0]
 [ 22 183  14   1]
 [  0  20 138   0]
 [  0   0   0  34]]

7. Cecck Classification Report



In [22]:

    
print("entropy"+"="*50)
print(classification_report(y_test, model_entropy.predict(X_test)))

print("gini"+"="*50)
print(classification_report(y_test, model_gini.predict(X_test)))

print("gaussian"+"="*50)
print(classification_report(y_test, model_gaussian.predict(X_test)))

print("ensemble"+"="*50)
print(classification_report(y_test, model_ensemble.predict(X_test)))









    



entropy==================================================
             precision    recall  f1-score   support

          0       0.55      0.96      0.70        52
          1       0.89      0.74      0.81       220
          2       0.90      0.87      0.88       158
          3       0.97      1.00      0.99        34

avg / total       0.86      0.83      0.83       464

gini==================================================
             precision    recall  f1-score   support

          0       0.58      0.88      0.70        52
          1       0.86      0.80      0.83       220
          2       0.91      0.85      0.88       158
          3       0.97      1.00      0.99        34

avg / total       0.86      0.84      0.84       464

gaussian==================================================
             precision    recall  f1-score   support

          0       0.68      0.96      0.80        52
          1       0.91      0.76      0.83       220
          2       0.82      0.90      0.86       158
          3       1.00      1.00      1.00        34

avg / total       0.86      0.85      0.85       464

ensemble==================================================
             precision    recall  f1-score   support

          0       0.69      0.94      0.80        52
          1       0.89      0.83      0.86       220
          2       0.90      0.87      0.89       158
          3       0.97      1.00      0.99        34

avg / total       0.88      0.87      0.87       464

8. Recomend Position



In [23]:

    
SQL_QUERY = """
    SELECT 
        tall, weight, apps_sub, mins, goals, assists
        , spg, ps_x, motm, aw, tackles, inter, fouls, clear, drb
        , owng, keyp_x, fouled, off, disp, unstch, avgp, position
    FROM player
    WHERE position like "%,%" and mins > 270
    ;
"""

many_position_player_df = pd.read_sql(SQL_QUERY, db)
len(many_position_player_df)









    Out[23]:





586



In [25]:

    
predict_data = model_ensemble.predict(many_position_player_df.ix[:,:-1])
many_position_player_df["recomend_position"] = predict_data



In [26]:

    
# Recomend Result
# 0 : Forword, 1 : Midfilder, 2 : Defencer, 3 : Goalkeeper
many_position_player_df.ix[:10,-2:]









    Out[26]:






  
    
      
      position
      recomend_position
    
  
  
    
      0
      M(CLR),FW
      1
    
    
      1
      D(C),DMC
      1
    
    
      2
      D(LR),M(CR)
      2
    
    
      3
      D(L),M(L)
      1
    
    
      4
      D(C),M(C)
      2
    
    
      5
      D(C),M(C)
      1
    
    
      6
      AM(L),FW
      0
    
    
      7
      AM(CLR),FW
      1
    
    
      8
      D(C),M(CLR)
      1
    
    
      9
      AM(CR),FW
      0
    
    
      10
      AM(CLR),FW
      0



In [ ]:

	tall	weight	apps_sub	mins	ps_x	aw	...	clear	drb	keyp_x	fouled	unstch	avgp	position
208	187	82	0	2430	59.7	0.5	...	1.2	0.0	0.0	0.3	0.1	25.5	3
209	198	83	2	1397	47.0	0.4	...	1.1	0.1	0.0	0.2	0.0	19.8	3
210	199	91	0	2020	48.2	0.3	...	1.1	0.0	0.0	0.0	0.0	22.7	3
211	188	78	0	2970	54.2	0.2	...	0.8	0.1	0.0	0.2	0.0	28.9	3
212	185	81	0	450	62.8	0.2	...	0.8	0.0	0.2	0.0	0.0	27.4	3

	position	recomend_position
0	M(CLR),FW	1
1	D(C),DMC	1
2	D(LR),M(CR)	2
3	D(L),M(L)	1
4	D(C),M(C)	2
5	D(C),M(C)	1
6	AM(L),FW	0
7	AM(CLR),FW	1
8	D(C),M(CLR)	1
9	AM(CR),FW	0
10	AM(CLR),FW	0