In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random
database = 'database.sqlite'
conn = sqlite3.connect(database)

In [2]:
uk = pd.read_csv('./leagues/1729.csv')
uk= uk.drop(['Unnamed: 0'],1)
de = pd.read_csv('./leagues/7809.csv')
de= de.drop(['Unnamed: 0'],1)
fr = pd.read_csv('./leagues/4769.csv')
fr= fr.drop(['Unnamed: 0'],1)
it = pd.read_csv('./leagues/10257.csv')
it= it.drop(['Unnamed: 0'],1)
es = pd.read_csv('./leagues/21518.csv')
es= es.drop(['Unnamed: 0'],1)
pt = pd.read_csv('./leagues/17642.csv')
pt= pt.drop(['Unnamed: 0'],1)

In [3]:
data= pd.concat([uk,de])
data1= pd.concat([es,fr])
data= pd.concat([data,it])
data1= pd.concat([data1,pt])
data= pd.concat([data,data1])



    
import graphlab as gl
#gl.canvas.set_target('ipynb')

In [4]:
ft = ['home_V','home_D','home_E','home_GF','home_AVG_GF','home_GS','home_AVG_GS','home_VG','home_DG','home_EG','home_GFG','home_AVG_GFG','home_GSG','home_AVG_GSG','away_V','away_D','away_E','away_GF','away_AVG_GF','away_GS','away_AVG_GS','away_VG','away_DG','away_EG','away_GFG','away_AVG_GFG','away_GSG','away_AVG_GSG','h_buildUpPlaySpeed','h_buildUpPlaySpeedClass','h_buildUpPlayDribblingClass','h_buildUpPlayPassing','h_buildUpPlayPassingClass','h_buildUpPlayPositioningClass','h_chanceCreationPassing','h_chanceCreationPassingClass','h_chanceCreationCrossing','h_chanceCreationCrossingClass','h_chanceCreationShooting','h_chanceCreationShootingClass','h_chanceCreationPositioningClass','h_defencePressure','h_defencePressureClass','h_defenceAggression','h_defenceAggressionClass','h_defenceTeamWidth','h_defenceTeamWidthClass','h_defenceDefenderLineClass','a_buildUpPlaySpeed','a_buildUpPlaySpeedClass','a_buildUpPlayDribblingClass','a_buildUpPlayPassing','a_buildUpPlayPassingClass','a_buildUpPlayPositioningClass','a_chanceCreationPassing','a_chanceCreationPassingClass','a_chanceCreationCrossing','a_chanceCreationCrossingClass','a_chanceCreationShooting','a_chanceCreationShootingClass','a_chanceCreationPositioningClass','a_defencePressure','a_defencePressureClass','a_defenceAggression','a_defenceAggressionClass','a_defenceTeamWidth','a_defenceTeamWidthClass','a_defenceDefenderLineClass','h_avg_height','h_avg_weight','a_avg_height','a_avg_weight','h_overall','h_potential','h_def','h_mid','h_att','a_overall','a_potential','a_def','a_mid','a_att','a_date','h_date','id','season','stage','home_team_api_id','away_team_api_id','B365H','B365D','B365A','formation_h','formation_a','league_id'] 

for column in ft:
    data[column]=(data[column] - data[column].mean()) / data[column].std()
    
data_SFrame = gl.SFrame(data)
data_SFrame.show()


This non-commercial license of GraphLab Create for academic use is assigned to felipeapfernandes@gmail.com and will expire on August 26, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1479181384.log
Canvas is accessible via web browser at the URL: http://localhost:39621/index.html
Opening Canvas in default web browser.

In [5]:
folds = gl.cross_validation.KFold(data_SFrame,10)
model_kfolds=[]
results=[]
for train,valid in folds:
    
    model = gl.logistic_classifier.create(train,target='Output',
                                                 features=ft,
                                                validation_set=valid)
    
    model_kfolds.append(model)
    results.append(model.evaluate(valid))


Logistic regression:
--------------------------------------------------------
Number of examples          : 7911
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 1.356953     | 0.536595          | 0.509091            |
| 2         | 3        | 1.562225     | 0.540892          | 0.506818            |
| 3         | 4        | 1.773969     | 0.541777          | 0.507955            |
| 4         | 5        | 2.002923     | 0.541777          | 0.507955            |
| 5         | 6        | 2.242849     | 0.541777          | 0.507955            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.380872     | 0.535389          | 0.525597            |
| 2         | 3        | 0.589228     | 0.536906          | 0.529010            |
| 3         | 4        | 0.797453     | 0.537791          | 0.534699            |
| 4         | 5        | 1.027924     | 0.537664          | 0.535836            |
| 5         | 6        | 1.233291     | 0.537664          | 0.535836            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.366072     | 0.533873          | 0.513083            |
| 2         | 3        | 0.568330     | 0.539055          | 0.511945            |
| 3         | 4        | 0.829072     | 0.539434          | 0.515358            |
| 4         | 5        | 1.029115     | 0.538928          | 0.515358            |
| 5         | 6        | 1.236247     | 0.538928          | 0.515358            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.356487     | 0.534505          | 0.503982            |
| 2         | 3        | 0.589495     | 0.537159          | 0.493743            |
| 3         | 4        | 0.806158     | 0.537664          | 0.494881            |
| 4         | 5        | 1.031804     | 0.537285          | 0.494881            |
| 5         | 6        | 1.279414     | 0.537285          | 0.494881            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.346609     | 0.537664          | 0.515358            |
| 2         | 3        | 0.552021     | 0.538928          | 0.519909            |
| 3         | 4        | 0.752601     | 0.539687          | 0.519909            |
| 4         | 5        | 1.017201     | 0.539813          | 0.519909            |
| 5         | 6        | 1.225616     | 0.539813          | 0.519909            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.366294     | 0.532861          | 0.535836            |
| 2         | 3        | 0.589317     | 0.538549          | 0.534699            |
| 3         | 4        | 0.803341     | 0.538423          | 0.536974            |
| 4         | 5        | 1.012268     | 0.538549          | 0.536974            |
| 5         | 6        | 1.213651     | 0.538675          | 0.536974            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.358980     | 0.533999          | 0.562002            |
| 2         | 3        | 0.556278     | 0.534884          | 0.557452            |
| 3         | 4        | 0.763372     | 0.535642          | 0.558589            |
| 4         | 5        | 0.976683     | 0.535516          | 0.558589            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.382097     | 0.534631          | 0.503982            |
| 2         | 3        | 0.596873     | 0.536400          | 0.488055            |
| 3         | 4        | 0.805270     | 0.537032          | 0.488055            |
| 4         | 5        | 1.014437     | 0.537159          | 0.488055            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.365843     | 0.541330          | 0.492605            |
| 2         | 3        | 0.565431     | 0.543731          | 0.486917            |
| 3         | 4        | 0.778347     | 0.543352          | 0.489192            |
| 4         | 5        | 0.986783     | 0.543225          | 0.489192            |
| 5         | 6        | 1.189243     | 0.543225          | 0.489192            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 95
Number of unpacked features : 95
Number of coefficients    : 192
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.354380     | 0.533241          | 0.562002            |
| 2         | 3        | 0.562896     | 0.534631          | 0.562002            |
| 3         | 4        | 0.766794     | 0.533873          | 0.558589            |
| 4         | 5        | 0.966945     | 0.533873          | 0.558589            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.


In [6]:
soma=0
for i in range( 0,len(results)):
    soma += results[i]['accuracy']
    
print soma/len(results)


0.520533793567

In [7]:
ft = ['home_V','home_D','home_E','home_GF','home_AVG_GF','home_GS','home_AVG_GS','home_VG','home_DG','home_EG','home_GFG','home_AVG_GFG','home_GSG','home_AVG_GSG','away_V','away_D','away_E','away_GF','away_AVG_GF','away_GS','away_AVG_GS','away_VG','away_DG','away_EG','away_GFG','away_AVG_GFG','away_GSG','away_AVG_GSG','h_avg_height','h_avg_weight','a_avg_height','a_avg_weight','h_overall','h_potential','h_def','h_mid','h_att','a_overall','a_potential','a_def','a_mid','a_att','a_date','h_date','id','season','stage','home_team_api_id','away_team_api_id','B365H','B365D','B365A','formation_h','formation_a','league_id'] 

folds = gl.cross_validation.KFold(data_SFrame,10)
model_kfolds=[]
results=[]
for train,valid in folds:
    
    model = gl.logistic_classifier.create(train,target='Output',
                                                 features=ft,
                                                validation_set=valid)
    
    model_kfolds.append(model)
    results.append(model.evaluate(valid))


Logistic regression:
--------------------------------------------------------
Number of examples          : 7911
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.161007     | 0.536721          | 0.512500            |
| 2         | 3        | 0.255862     | 0.534572          | 0.514773            |
| 3         | 4        | 0.351397     | 0.535457          | 0.515909            |
| 4         | 5        | 0.452207     | 0.535204          | 0.517045            |
| 5         | 6        | 0.551849     | 0.535204          | 0.517045            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.156893     | 0.535010          | 0.526735            |
| 2         | 3        | 0.267671     | 0.534757          | 0.521047            |
| 3         | 4        | 0.371214     | 0.534125          | 0.519909            |
| 4         | 5        | 0.476403     | 0.533999          | 0.519909            |
| 5         | 6        | 0.582880     | 0.533999          | 0.519909            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.154308     | 0.535895          | 0.526735            |
| 2         | 3        | 0.260083     | 0.536780          | 0.524460            |
| 3         | 4        | 0.368567     | 0.537032          | 0.524460            |
| 4         | 5        | 0.463313     | 0.537159          | 0.524460            |
| 5         | 6        | 0.560630     | 0.537159          | 0.524460            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.149519     | 0.536021          | 0.523322            |
| 2         | 3        | 0.237150     | 0.536400          | 0.515358            |
| 3         | 4        | 0.348799     | 0.535768          | 0.511945            |
| 4         | 5        | 0.471850     | 0.535768          | 0.511945            |
| 5         | 6        | 0.588431     | 0.535768          | 0.511945            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.167948     | 0.536148          | 0.521047            |
| 2         | 3        | 0.283467     | 0.535137          | 0.526735            |
| 3         | 4        | 0.395898     | 0.535010          | 0.524460            |
| 4         | 5        | 0.513759     | 0.534884          | 0.524460            |
| 5         | 6        | 0.613808     | 0.534884          | 0.524460            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.146352     | 0.531092          | 0.547213            |
| 2         | 3        | 0.265118     | 0.532735          | 0.550626            |
| 3         | 4        | 0.365567     | 0.532735          | 0.550626            |
| 4         | 5        | 0.474996     | 0.532861          | 0.550626            |
| 5         | 6        | 0.591016     | 0.532861          | 0.549488            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.171894     | 0.530207          | 0.558589            |
| 2         | 3        | 0.263297     | 0.531850          | 0.559727            |
| 3         | 4        | 0.358609     | 0.530839          | 0.559727            |
| 4         | 5        | 0.456416     | 0.530966          | 0.559727            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.207028     | 0.535010          | 0.517634            |
| 2         | 3        | 0.356642     | 0.535895          | 0.514221            |
| 3         | 4        | 0.498061     | 0.535768          | 0.514221            |
| 4         | 5        | 0.605413     | 0.535895          | 0.515358            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.178819     | 0.537159          | 0.494881            |
| 2         | 3        | 0.294510     | 0.536780          | 0.503982            |
| 3         | 4        | 0.415218     | 0.537285          | 0.503982            |
| 4         | 5        | 0.532148     | 0.537032          | 0.503982            |
| 5         | 6        | 0.649294     | 0.537032          | 0.503982            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 55
Number of unpacked features : 55
Number of coefficients    : 112
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.164415     | 0.531092          | 0.557452            |
| 2         | 3        | 0.275381     | 0.532356          | 0.558589            |
| 3         | 4        | 0.390789     | 0.532356          | 0.563140            |
| 4         | 5        | 0.534071     | 0.532230          | 0.563140            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.


In [8]:
soma=0
for i in range( 0,len(results)):
    soma += results[i]['accuracy']
    
print soma/len(results)


0.528951416899

In [9]:
model_kfolds=[]
results=[]
for train,valid in folds:
    model = gl.logistic_classifier.create(train,target='Output',
                                      features=['country_id','league_id','season','stage',
                                                'home_team_api_id','away_team_api_id','B365H','B365D','B365A',
                                                'formation_h','formation_a','home_GS','home_AVG_GS','home_AVG_GFG','home_AVG_GSG','away_GS','away_AVG_GS','away_AVG_GFG','away_AVG_GSG'], validation_set=valid)
    
    model_kfolds.append(model)
    results.append(model.evaluate(valid))


Logistic regression:
--------------------------------------------------------
Number of examples          : 7911
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.063131     | 0.533055          | 0.503409            |
| 2         | 3        | 0.108973     | 0.537227          | 0.509091            |
| 3         | 4        | 0.146481     | 0.537227          | 0.513636            |
| 4         | 5        | 0.184024     | 0.536721          | 0.513636            |
| 5         | 6        | 0.223455     | 0.536721          | 0.513636            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.063143     | 0.529070          | 0.534699            |
| 2         | 3        | 0.110019     | 0.532482          | 0.538111            |
| 3         | 4        | 0.152256     | 0.533873          | 0.535836            |
| 4         | 5        | 0.196953     | 0.534125          | 0.535836            |
| 5         | 6        | 0.244221     | 0.534125          | 0.535836            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.062985     | 0.531471          | 0.523322            |
| 2         | 3        | 0.117745     | 0.535895          | 0.521047            |
| 3         | 4        | 0.158505     | 0.536653          | 0.517634            |
| 4         | 5        | 0.204130     | 0.536527          | 0.518771            |
| 5         | 6        | 0.260797     | 0.536527          | 0.518771            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.054358     | 0.530966          | 0.523322            |
| 2         | 3        | 0.094997     | 0.533620          | 0.530148            |
| 3         | 4        | 0.138461     | 0.533114          | 0.531286            |
| 4         | 5        | 0.178875     | 0.532861          | 0.531286            |
| 5         | 6        | 0.221650     | 0.532861          | 0.531286            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.059616     | 0.530966          | 0.527873            |
| 2         | 3        | 0.096355     | 0.536021          | 0.521047            |
| 3         | 4        | 0.141825     | 0.535642          | 0.523322            |
| 4         | 5        | 0.187851     | 0.535389          | 0.523322            |
| 5         | 6        | 0.228526     | 0.535389          | 0.523322            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.064054     | 0.528943          | 0.556314            |
| 2         | 3        | 0.113176     | 0.532861          | 0.555176            |
| 3         | 4        | 0.164676     | 0.533493          | 0.554039            |
| 4         | 5        | 0.206489     | 0.533367          | 0.554039            |
| 5         | 6        | 0.251916     | 0.533367          | 0.554039            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.045284     | 0.528564          | 0.556314            |
| 2         | 3        | 0.086631     | 0.530460          | 0.563140            |
| 3         | 4        | 0.125769     | 0.530839          | 0.562002            |
| 4         | 5        | 0.168967     | 0.530966          | 0.562002            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.055677     | 0.531092          | 0.522184            |
| 2         | 3        | 0.095944     | 0.533999          | 0.534699            |
| 3         | 4        | 0.140831     | 0.533493          | 0.533561            |
| 4         | 5        | 0.183527     | 0.533746          | 0.534699            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.046891     | 0.534252          | 0.491468            |
| 2         | 3        | 0.089745     | 0.537791          | 0.500569            |
| 3         | 4        | 0.138729     | 0.538296          | 0.500569            |
| 4         | 5        | 0.190399     | 0.538549          | 0.500569            |
| 5         | 6        | 0.231798     | 0.538549          | 0.500569            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.

Logistic regression:
--------------------------------------------------------
Number of examples          : 7912
Number of classes           : 3
Number of feature columns   : 19
Number of unpacked features : 19
Number of coefficients    : 40
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |
+-----------+----------+--------------+-------------------+---------------------+
| 1         | 2        | 0.055185     | 0.526795          | 0.563140            |
| 2         | 3        | 0.103692     | 0.530713          | 0.563140            |
| 3         | 4        | 0.154131     | 0.531092          | 0.563140            |
| 4         | 5        | 0.200667     | 0.530713          | 0.564278            |
+-----------+----------+--------------+-------------------+---------------------+
SUCCESS: Optimal solution found.


In [10]:
soma=0
for i in range( 0,len(results)):
    soma += results[i]['accuracy']
    
print soma/len(results)


0.533843727376

In [ ]:


In [13]:
def plotData(data, label_x, label_y, label_pos, label_neg,classes_label):
    out=[]
    m=['+','v','*']
    color=['b','r','k']
    for cl in data[classes_label].unique():
        out.append(data[data[classes_label]==cl])
    axes = plt.gca()   
    for i in range(0,len(out)):
        axes.scatter(out[i][label_x], out[i][label_y], marker=m[i], c=color[i], s=60, linewidth=2, label=out[i][classes_label].unique())
    axes.set_xlabel(label_x)
    axes.set_ylabel(label_y)
    axes.grid(True)
    axes.legend(frameon= True, fancybox = True);
    
#data1=data[data['league_id']==1729]
#print data1['home_AVG_GFG']
plotData(data, 'home_AVG_GFG', 'away_AVG_GSG', 'home_AVG_GFG', 'away_AVG_GSG','Output')
plt.show()


Series([], Name: home_AVG_GFG, dtype: float64)

In [ ]: