In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random
database = 'database.sqlite'
conn = sqlite3.connect(database)
In [2]:
uk = pd.read_csv('./leagues/1729.csv')
uk= uk.drop(['Unnamed: 0'],1)
de = pd.read_csv('./leagues/7809.csv')
de= de.drop(['Unnamed: 0'],1)
fr = pd.read_csv('./leagues/4769.csv')
fr= fr.drop(['Unnamed: 0'],1)
it = pd.read_csv('./leagues/10257.csv')
it= it.drop(['Unnamed: 0'],1)
es = pd.read_csv('./leagues/21518.csv')
es= es.drop(['Unnamed: 0'],1)
pt = pd.read_csv('./leagues/17642.csv')
pt= pt.drop(['Unnamed: 0'],1)
In [3]:
data= pd.concat([uk,de])
data1= pd.concat([es,fr])
data= pd.concat([data,it])
data1= pd.concat([data1,pt])
data= pd.concat([data,data1])
import graphlab as gl
#gl.canvas.set_target('ipynb')
In [4]:
ft = ['home_V','home_D','home_E','home_GF','home_AVG_GF','home_GS','home_AVG_GS','home_VG','home_DG','home_EG','home_GFG','home_AVG_GFG','home_GSG','home_AVG_GSG','away_V','away_D','away_E','away_GF','away_AVG_GF','away_GS','away_AVG_GS','away_VG','away_DG','away_EG','away_GFG','away_AVG_GFG','away_GSG','away_AVG_GSG','h_buildUpPlaySpeed','h_buildUpPlaySpeedClass','h_buildUpPlayDribblingClass','h_buildUpPlayPassing','h_buildUpPlayPassingClass','h_buildUpPlayPositioningClass','h_chanceCreationPassing','h_chanceCreationPassingClass','h_chanceCreationCrossing','h_chanceCreationCrossingClass','h_chanceCreationShooting','h_chanceCreationShootingClass','h_chanceCreationPositioningClass','h_defencePressure','h_defencePressureClass','h_defenceAggression','h_defenceAggressionClass','h_defenceTeamWidth','h_defenceTeamWidthClass','h_defenceDefenderLineClass','a_buildUpPlaySpeed','a_buildUpPlaySpeedClass','a_buildUpPlayDribblingClass','a_buildUpPlayPassing','a_buildUpPlayPassingClass','a_buildUpPlayPositioningClass','a_chanceCreationPassing','a_chanceCreationPassingClass','a_chanceCreationCrossing','a_chanceCreationCrossingClass','a_chanceCreationShooting','a_chanceCreationShootingClass','a_chanceCreationPositioningClass','a_defencePressure','a_defencePressureClass','a_defenceAggression','a_defenceAggressionClass','a_defenceTeamWidth','a_defenceTeamWidthClass','a_defenceDefenderLineClass','h_avg_height','h_avg_weight','a_avg_height','a_avg_weight','h_overall','h_potential','h_def','h_mid','h_att','a_overall','a_potential','a_def','a_mid','a_att','a_date','h_date','id','season','stage','home_team_api_id','away_team_api_id','B365H','B365D','B365A','formation_h','formation_a','league_id']
for column in ft:
data[column]=(data[column] - data[column].mean()) / data[column].std()
data_SFrame = gl.SFrame(data)
data_SFrame.show()
In [5]:
folds = gl.cross_validation.KFold(data_SFrame,10)
model_kfolds=[]
results=[]
for train,valid in folds:
model = gl.logistic_classifier.create(train,target='Output',
features=ft,
validation_set=valid)
model_kfolds.append(model)
results.append(model.evaluate(valid))
In [6]:
soma=0
for i in range( 0,len(results)):
soma += results[i]['accuracy']
print soma/len(results)
In [7]:
ft = ['home_V','home_D','home_E','home_GF','home_AVG_GF','home_GS','home_AVG_GS','home_VG','home_DG','home_EG','home_GFG','home_AVG_GFG','home_GSG','home_AVG_GSG','away_V','away_D','away_E','away_GF','away_AVG_GF','away_GS','away_AVG_GS','away_VG','away_DG','away_EG','away_GFG','away_AVG_GFG','away_GSG','away_AVG_GSG','h_avg_height','h_avg_weight','a_avg_height','a_avg_weight','h_overall','h_potential','h_def','h_mid','h_att','a_overall','a_potential','a_def','a_mid','a_att','a_date','h_date','id','season','stage','home_team_api_id','away_team_api_id','B365H','B365D','B365A','formation_h','formation_a','league_id']
folds = gl.cross_validation.KFold(data_SFrame,10)
model_kfolds=[]
results=[]
for train,valid in folds:
model = gl.logistic_classifier.create(train,target='Output',
features=ft,
validation_set=valid)
model_kfolds.append(model)
results.append(model.evaluate(valid))
In [8]:
soma=0
for i in range( 0,len(results)):
soma += results[i]['accuracy']
print soma/len(results)
In [9]:
model_kfolds=[]
results=[]
for train,valid in folds:
model = gl.logistic_classifier.create(train,target='Output',
features=['country_id','league_id','season','stage',
'home_team_api_id','away_team_api_id','B365H','B365D','B365A',
'formation_h','formation_a','home_GS','home_AVG_GS','home_AVG_GFG','home_AVG_GSG','away_GS','away_AVG_GS','away_AVG_GFG','away_AVG_GSG'], validation_set=valid)
model_kfolds.append(model)
results.append(model.evaluate(valid))
In [10]:
soma=0
for i in range( 0,len(results)):
soma += results[i]['accuracy']
print soma/len(results)
In [ ]:
In [13]:
def plotData(data, label_x, label_y, label_pos, label_neg,classes_label):
out=[]
m=['+','v','*']
color=['b','r','k']
for cl in data[classes_label].unique():
out.append(data[data[classes_label]==cl])
axes = plt.gca()
for i in range(0,len(out)):
axes.scatter(out[i][label_x], out[i][label_y], marker=m[i], c=color[i], s=60, linewidth=2, label=out[i][classes_label].unique())
axes.set_xlabel(label_x)
axes.set_ylabel(label_y)
axes.grid(True)
axes.legend(frameon= True, fancybox = True);
#data1=data[data['league_id']==1729]
#print data1['home_AVG_GFG']
plotData(data, 'home_AVG_GFG', 'away_AVG_GSG', 'home_AVG_GFG', 'away_AVG_GSG','Output')
plt.show()
In [ ]: