In [2]:
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import MySQLdb
from sklearn.tree import export_graphviz
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
In [3]:
db = MySQLdb.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"football",
charset='utf8',
)
def make_query(position):
"""
parameter------------
position : M, D, F, G
return---------------
SQL_QUERY String
"""
SQL_QUERY = """
SELECT *
FROM player
"""
if position == "F":
SQL_QUERY += """
WHERE position not like "%,%" and position like "%FW%" and mins > 270
"""
if position == "M":
SQL_QUERY += """
WHERE position not like "%,%" and position like "%M%" and mins > 270
"""
if position == "D":
SQL_QUERY += """
WHERE position not like "%,%" and position like "%D%" and position not like " DMC" and mins > 270
"""
if position == "G":
SQL_QUERY += """
WHERE position not like "%,%" and position like "%G%" and mins > 270
"""
return SQL_QUERY
In [4]:
# forword
SQL_QUERY = make_query("F")
forword_df = pd.read_sql(SQL_QUERY, db)
# midfilder
SQL_QUERY = make_query("M")
midfilder_df = pd.read_sql(SQL_QUERY, db)
# defencer
SQL_QUERY = make_query("D")
defencer_df = pd.read_sql(SQL_QUERY, db)
# goalkeeper
SQL_QUERY = make_query("G")
goalkeeper_df = pd.read_sql(SQL_QUERY, db)
len(forword_df), len(midfilder_df), len(defencer_df), len(goalkeeper_df)
Out[4]:
In [5]:
forword_df["position"] = 0
forword_df
midfilder_df["position"] = 1
midfilder_df
defencer_df["position"] = 2
defencer_df
goalkeeper_df["position"] = 3
goalkeeper_df
concated_df = pd.concat([forword_df, midfilder_df, defencer_df, goalkeeper_df])
concated_df.tail()
Out[5]:
In [6]:
X_train, X_test, y_train, y_test = train_test_split(concated_df.ix[:,:-1], concated_df.ix[:,-1], test_size=0.2, random_state=1)
In [7]:
from sklearn.tree import DecisionTreeClassifier
model_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3).fit(X_train, y_train)
In [8]:
model_gini = DecisionTreeClassifier(criterion='gini', max_depth=3).fit(X_train, y_train)
In [9]:
from sklearn.naive_bayes import GaussianNB
model_gaussian = GaussianNB().fit(X_train, y_train)
In [10]:
from sklearn.ensemble import VotingClassifier
clf1 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
clf2 = DecisionTreeClassifier(criterion='gini', max_depth=3)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('entropy', clf1), ('gini', clf2), ('naive', clf3)], voting='soft', weights=[2, 1, 1])
model_ensemble = eclf.fit(X_train, y_train)
In [20]:
cm_entropy = confusion_matrix(y_test, model_entropy.predict(X_test))
cm_gini = confusion_matrix(y_test, model_gini.predict(X_test))
cm_gaussian = confusion_matrix(y_test, model_gaussian.predict(X_test))
cm_ensemble = confusion_matrix(y_test, model_ensemble.predict(X_test))
print("entropy"+"="*12)
print(cm_entropy)
print("gini"+"="*15)
print(cm_gini)
print("gaussian"+"="*11)
print(cm_gaussian)
print("ensemble"+"="*11)
print(cm_ensemble)
In [22]:
print("entropy"+"="*50)
print(classification_report(y_test, model_entropy.predict(X_test)))
print("gini"+"="*50)
print(classification_report(y_test, model_gini.predict(X_test)))
print("gaussian"+"="*50)
print(classification_report(y_test, model_gaussian.predict(X_test)))
print("ensemble"+"="*50)
print(classification_report(y_test, model_ensemble.predict(X_test)))
In [23]:
SQL_QUERY = """
SELECT
tall, weight, apps_sub, mins, goals, assists
, spg, ps_x, motm, aw, tackles, inter, fouls, clear, drb
, owng, keyp_x, fouled, off, disp, unstch, avgp, position
FROM player
WHERE position like "%,%" and mins > 270
;
"""
many_position_player_df = pd.read_sql(SQL_QUERY, db)
len(many_position_player_df)
Out[23]:
In [25]:
predict_data = model_ensemble.predict(many_position_player_df.ix[:,:-1])
many_position_player_df["recomend_position"] = predict_data
In [26]:
# Recomend Result
# 0 : Forword, 1 : Midfilder, 2 : Defencer, 3 : Goalkeeper
many_position_player_df.ix[:10,-2:]
Out[26]:
In [ ]: