In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random

In [2]:
database = 'database.sqlite'
conn = sqlite3.connect(database)

In [3]:
query = """select * from match where home_player_X1 is not null and 'home_player_Y1' is not null 
and  home_player_X2 is not null and 'home_player_Y2' is not null 
and  home_player_X3 is not null and home_player_Y3 is not null 
and  home_player_X4 is not null and home_player_Y4 is not null 
and  home_player_X5 is not null and home_player_Y5 is not null 
and  home_player_X6 is not null and home_player_Y6 is not null 
and  home_player_X7 is not null and home_player_Y7 is not null 
and  home_player_X8 is not null and home_player_Y8 is not null 
and  home_player_X9 is not null and home_player_Y9 is not null 
and  home_player_X10 is not null and home_player_Y10 is not null 
and  home_player_X11 is not null and home_player_Y11 is not null  
and  away_player_X1 is not null and away_player_Y1 is not null 
and  away_player_X2 is not null and away_player_Y2 is not null 
and  away_player_X3 is not null and away_player_Y3 is not null 
and  away_player_X4 is not null and away_player_Y4 is not null 
and  away_player_X5 is not null and away_player_Y5 is not null 
and  away_player_X6 is not null and away_player_Y6 is not null 
and  away_player_X7 is not null and away_player_Y7 is not null 
and  away_player_X8 is not null and away_player_Y8 is not null 
and  away_player_X9 is not null and away_player_Y9 is not null 
and  away_player_X10 is not null and away_player_Y10 is not null 
and  away_player_X11 is not null and away_player_Y11 is not null 
and home_team_goal is not null and away_team_goal is not null
and home_player_1 is not null 
and home_player_2 is not null  
and home_player_3 is not null  
and home_player_4 is not null  
and home_player_5 is not null  
and home_player_6 is not null  
and home_player_7 is not null  
and home_player_8 is not null 
and home_player_9 is not null  
and home_player_10 is not null  
and home_player_11 is not null  
and away_player_1 is not null  
and away_player_2 is not null  
and away_player_3 is not null  
and away_player_4 is not null  
and away_player_5 is not null  
and away_player_6 is not null  
and away_player_7 is not null  
and away_player_8 is not null  
and away_player_9 is not null  
and away_player_10 is not null  
and away_player_11 is not null  
and  B365H is not null and B365D is not null and B365A is not null;"""
matches = pd.read_sql(query, conn)
matches


Out[3]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... SJA VCH VCD VCA GBH GBD GBA BSH BSD BSA
0 146 1 1 2008/2009 24 2009-02-27 00:00:00 493017 8203 9987 2 ... 2.30 2.65 3.25 2.35 2.90 3.25 2.30 2.80 3.20 2.25
1 154 1 1 2008/2009 25 2009-03-08 00:00:00 493025 9984 8342 1 ... 2.25 2.65 3.20 2.35 2.90 3.20 2.30 2.62 3.20 2.38
2 156 1 1 2008/2009 25 2009-03-07 00:00:00 493027 8635 10000 2 ... 8.50 1.30 4.35 8.00 1.35 4.33 8.50 1.36 4.20 7.00
3 163 1 1 2008/2009 26 2009-03-13 00:00:00 493034 8203 8635 2 ... 1.73 4.35 3.30 1.75 4.50 3.40 1.75 4.20 3.30 1.75
4 169 1 1 2008/2009 26 2009-03-14 00:00:00 493040 10000 9999 0 ... 5.00 1.65 3.50 4.50 1.65 3.50 5.00 1.70 3.40 4.33
5 174 1 1 2008/2009 27 2009-03-22 00:00:00 493045 9991 10000 1 ... 4.75 1.60 3.40 5.00 1.65 3.40 5.00 1.62 3.50 5.00
6 177 1 1 2008/2009 27 2009-03-21 00:00:00 493048 9999 8203 1 ... 2.75 2.25 3.25 2.80 2.10 3.25 3.15 2.25 3.20 2.75
7 190 1 1 2008/2009 29 2009-04-12 00:00:00 493061 8635 8342 1 ... 4.75 1.65 3.40 5.00 1.70 3.40 4.50 1.73 3.40 4.20
8 191 1 1 2008/2009 29 2009-04-10 00:00:00 493062 9999 9987 1 ... 2.15 2.80 3.25 2.25 3.20 3.20 2.10 2.80 3.20 2.25
9 220 1 1 2008/2009 31 2009-04-26 00:00:00 493082 9999 9991 1 ... 2.20 3.20 3.20 2.10 3.00 3.25 2.15 2.88 3.25 2.20
10 227 1 1 2008/2009 32 2009-05-02 00:00:00 493089 10000 9985 0 ... 1.62 5.00 3.50 1.60 5.25 3.50 1.60 5.00 3.60 1.57
11 230 1 1 2008/2009 32 2009-05-02 00:00:00 493092 9991 9984 2 ... 8.50 1.35 4.00 8.50 1.35 4.50 7.00 1.33 4.50 7.50
12 232 1 1 2008/2009 32 2009-05-02 00:00:00 493094 10001 9999 1 ... 5.50 1.55 3.50 5.50 1.57 3.60 5.25 1.57 3.60 5.00
13 235 1 1 2008/2009 33 2009-05-09 00:00:00 493097 9985 8342 2 ... 7.50 1.45 3.80 6.50 1.47 3.75 6.50 1.50 3.75 5.50
14 241 1 1 2008/2009 33 2009-05-09 00:00:00 493103 8635 9999 3 ... 16.00 1.14 6.50 13.00 1.15 6.50 13.00 1.17 6.00 13.00
15 243 1 1 2008/2009 33 2009-05-09 00:00:00 493105 9984 10001 2 ... 2.63 2.60 3.25 2.40 2.50 3.30 2.50 2.50 3.20 2.50
16 244 1 1 2008/2009 34 2009-05-16 00:00:00 493106 9987 8635 0 ... 1.45 6.50 4.00 1.44 6.50 3.75 1.45 7.00 4.00 1.40
17 245 1 1 2008/2009 34 2009-05-16 00:00:00 493107 9991 9985 0 ... 1.70 4.50 3.60 1.65 4.50 3.60 1.65 4.50 3.50 1.67
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... 2.30 2.62 3.25 2.40 2.70 3.20 2.40 2.88 3.10 2.30
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... 3.30 2.00 3.25 3.40 2.10 3.30 3.10 2.25 3.20 2.80
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... 1.57 5.50 3.60 1.57 5.25 3.50 1.60 5.50 3.60 1.53
21 314 1 1 2009/2010 10 2009-10-04 00:00:00 665411 8342 8635 4 ... 2.75 2.40 3.25 2.62 2.45 3.20 2.70 2.38 3.20 2.62
22 318 1 1 2009/2010 10 2009-10-03 00:00:00 665417 8203 9994 2 ... 4.00 1.85 3.30 3.75 1.95 3.25 3.75 1.91 3.25 3.60
23 320 1 1 2009/2010 10 2009-10-03 00:00:00 665421 9993 10001 3 ... 4.50 1.73 3.40 4.33 1.75 3.40 4.50 1.75 3.30 4.20
24 321 1 1 2009/2010 11 2009-10-18 00:00:00 665425 8342 9991 1 ... 4.75 1.70 3.50 4.00 1.60 3.50 5.25 1.67 3.40 4.75
25 323 1 1 2009/2010 11 2009-10-18 00:00:00 665427 9987 8203 1 ... 4.33 1.75 3.40 4.33 1.83 3.30 4.00 1.83 3.30 3.80
26 324 1 1 2009/2010 11 2009-10-17 00:00:00 665429 10000 9993 4 ... 3.60 1.90 3.30 3.60 2.05 3.25 3.30 2.00 3.25 3.30
27 325 1 1 2009/2010 11 2009-10-17 00:00:00 665430 9994 9984 1 ... 3.40 2.00 3.25 3.40 2.10 3.20 3.20 2.05 3.25 3.20
28 327 1 1 2009/2010 11 2009-10-17 00:00:00 665435 10001 9985 2 ... 1.67 5.50 3.40 1.60 5.00 3.40 1.65 4.75 3.40 1.67
29 328 1 1 2009/2010 12 2009-10-24 00:00:00 665438 9985 10000 1 ... 6.50 1.44 3.75 6.50 1.40 4.00 7.50 1.44 3.75 6.50
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19570 24492 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030140 9783 8558 3 ... NaN 2.10 3.40 3.90 NaN NaN NaN NaN NaN NaN
19571 24493 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030141 9869 8603 1 ... NaN 2.30 3.30 3.40 NaN NaN NaN NaN NaN NaN
19572 24494 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030142 8302 8634 2 ... NaN 4.10 3.90 1.91 NaN NaN NaN NaN NaN NaN
19573 24496 21484 21484 2015/2016 7 2015-10-02 00:00:00 2030144 9910 8305 0 ... NaN 1.50 4.50 7.50 NaN NaN NaN NaN NaN NaN
19574 24497 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030145 8581 10205 1 ... NaN 4.40 3.60 1.93 NaN NaN NaN NaN NaN NaN
19575 24498 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030146 9906 8633 1 ... NaN 3.13 3.40 2.45 NaN NaN NaN NaN NaN NaN
19576 24499 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030147 9864 8560 3 ... NaN 2.40 3.25 3.30 NaN NaN NaN NaN NaN NaN
19577 24500 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030148 8315 10267 3 ... NaN 2.38 3.40 3.25 NaN NaN NaN NaN NaN NaN
19578 24501 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030149 7878 9783 1 ... NaN 2.63 3.25 3.00 NaN NaN NaN NaN NaN NaN
19579 24502 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030150 8558 9869 1 ... NaN 2.00 3.40 4.30 NaN NaN NaN NaN NaN NaN
19580 24503 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030151 8370 8603 0 ... NaN 2.20 3.80 3.30 NaN NaN NaN NaN NaN NaN
19581 24504 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030152 8634 8370 5 ... NaN 1.15 9.50 19.00 NaN NaN NaN NaN NaN NaN
19582 24505 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030153 8372 8302 1 ... NaN 3.60 3.50 2.20 NaN NaN NaN NaN NaN NaN
19583 24507 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030155 10205 9910 1 ... NaN 2.05 3.70 3.75 NaN NaN NaN NaN NaN NaN
19584 24508 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030156 8633 8581 3 ... NaN 1.08 13.00 34.00 NaN NaN NaN NaN NaN NaN
19585 24509 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030157 8560 9906 0 ... NaN 4.30 3.30 2.05 NaN NaN NaN NaN NaN NaN
19586 24510 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030158 10267 9864 3 ... NaN 1.62 4.10 6.25 NaN NaN NaN NaN NaN NaN
19587 24511 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030159 9783 8315 2 ... NaN 2.70 3.25 2.88 NaN NaN NaN NaN NaN NaN
19588 24512 21484 21484 2015/2016 8 2015-10-19 00:00:00 2030160 9869 7878 3 ... NaN 2.00 3.40 4.33 NaN NaN NaN NaN NaN NaN
19589 24513 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030161 8603 8558 1 ... NaN 2.05 3.50 4.00 NaN NaN NaN NaN NaN NaN
19590 24514 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030162 8634 8372 3 ... NaN 1.11 11.00 31.00 NaN NaN NaN NaN NaN NaN
19591 24515 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030163 8302 8305 5 ... NaN 1.50 4.60 7.50 NaN NaN NaN NaN NaN NaN
19592 24516 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030164 8306 10205 0 ... NaN 3.60 3.40 2.20 NaN NaN NaN NaN NaN NaN
19593 24517 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030165 9910 8633 1 ... NaN 3.90 4.00 1.93 NaN NaN NaN NaN NaN NaN
19594 24518 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030166 8581 8560 0 ... NaN 2.75 3.13 2.90 NaN NaN NaN NaN NaN NaN
19595 24519 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030167 9906 10267 2 ... NaN 1.57 4.00 7.00 NaN NaN NaN NaN NaN NaN
19596 24520 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030168 9864 9783 2 ... NaN 2.30 3.40 3.40 NaN NaN NaN NaN NaN NaN
19597 24521 21484 21484 2015/2016 9 2015-10-26 00:00:00 2030169 8315 9869 3 ... NaN 1.55 4.20 7.00 NaN NaN NaN NaN NaN NaN
19598 24522 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030170 7878 8603 1 ... NaN 2.30 3.40 3.30 NaN NaN NaN NaN NaN NaN
19599 24523 21484 21484 2015/2016 9 2015-10-23 00:00:00 2030171 8370 8558 3 ... NaN 2.20 3.60 3.50 NaN NaN NaN NaN NaN NaN

19600 rows × 115 columns

Como podemos observar, temos 19600 jogos com informação sobre a posição dos jogadores e suas informações. Entretanto precisamos refinar as features.


In [4]:
drop = matches.columns.values[-27:-1]
print drop 
#Removing other betting houses odds
matches2 = matches.drop(drop,1)
matches2= matches2.drop('BSA',1)
matches2


['BWH' 'BWD' 'BWA' 'IWH' 'IWD' 'IWA' 'LBH' 'LBD' 'LBA' 'PSH' 'PSD' 'PSA'
 'WHH' 'WHD' 'WHA' 'SJH' 'SJD' 'SJA' 'VCH' 'VCD' 'VCA' 'GBH' 'GBD' 'GBA'
 'BSH' 'BSD']
Out[4]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... shoton shotoff foulcommit card cross corner possession B365H B365D B365A
0 146 1 1 2008/2009 24 2009-02-27 00:00:00 493017 8203 9987 2 ... None None None None None None None 3.00 3.40 2.30
1 154 1 1 2008/2009 25 2009-03-08 00:00:00 493025 9984 8342 1 ... None None None None None None None 2.80 3.20 2.37
2 156 1 1 2008/2009 25 2009-03-07 00:00:00 493027 8635 10000 2 ... None None None None None None None 1.40 4.50 8.00
3 163 1 1 2008/2009 26 2009-03-13 00:00:00 493034 8203 8635 2 ... None None None None None None None 4.50 3.60 1.75
4 169 1 1 2008/2009 26 2009-03-14 00:00:00 493040 10000 9999 0 ... None None None None None None None 1.75 3.50 4.75
5 174 1 1 2008/2009 27 2009-03-22 00:00:00 493045 9991 10000 1 ... None None None None None None None 1.65 3.60 4.75
6 177 1 1 2008/2009 27 2009-03-21 00:00:00 493048 9999 8203 1 ... None None None None None None None 2.20 3.30 3.00
7 190 1 1 2008/2009 29 2009-04-12 00:00:00 493061 8635 8342 1 ... None None None None None None None 1.67 3.50 5.00
8 191 1 1 2008/2009 29 2009-04-10 00:00:00 493062 9999 9987 1 ... None None None None None None None 3.20 3.25 2.25
9 220 1 1 2008/2009 31 2009-04-26 00:00:00 493082 9999 9991 1 ... None None None None None None None 3.20 3.30 2.10
10 227 1 1 2008/2009 32 2009-05-02 00:00:00 493089 10000 9985 0 ... None None None None None None None 5.50 3.60 1.57
11 230 1 1 2008/2009 32 2009-05-02 00:00:00 493092 9991 9984 2 ... None None None None None None None 1.36 4.33 7.50
12 232 1 1 2008/2009 32 2009-05-02 00:00:00 493094 10001 9999 1 ... None None None None None None None 1.57 3.60 5.50
13 235 1 1 2008/2009 33 2009-05-09 00:00:00 493097 9985 8342 2 ... None None None None None None None 1.44 4.33 7.00
14 241 1 1 2008/2009 33 2009-05-09 00:00:00 493103 8635 9999 3 ... None None None None None None None 1.14 7.50 19.00
15 243 1 1 2008/2009 33 2009-05-09 00:00:00 493105 9984 10001 2 ... None None None None None None None 2.62 3.30 2.62
16 244 1 1 2008/2009 34 2009-05-16 00:00:00 493106 9987 8635 0 ... None None None None None None None 8.00 4.00 1.40
17 245 1 1 2008/2009 34 2009-05-16 00:00:00 493107 9991 9985 0 ... None None None None None None None 4.33 3.80 1.67
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... None None None None None None None 2.88 3.30 2.25
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... None None None None None None None 2.30 3.30 2.88
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... None None None None None None None 5.75 3.60 1.57
21 314 1 1 2009/2010 10 2009-10-04 00:00:00 665411 8342 8635 4 ... None None None None None None None 2.50 3.25 2.60
22 318 1 1 2009/2010 10 2009-10-03 00:00:00 665417 8203 9994 2 ... None None None None None None None 1.91 3.30 4.20
23 320 1 1 2009/2010 10 2009-10-03 00:00:00 665421 9993 10001 3 ... None None None None None None None 1.73 3.50 5.00
24 321 1 1 2009/2010 11 2009-10-18 00:00:00 665425 8342 9991 1 ... None None None None None None None 1.70 3.40 4.75
25 323 1 1 2009/2010 11 2009-10-18 00:00:00 665427 9987 8203 1 ... None None None None None None None 1.85 3.30 4.00
26 324 1 1 2009/2010 11 2009-10-17 00:00:00 665429 10000 9993 4 ... None None None None None None None 1.91 3.30 3.80
27 325 1 1 2009/2010 11 2009-10-17 00:00:00 665430 9994 9984 1 ... None None None None None None None 2.10 3.20 3.30
28 327 1 1 2009/2010 11 2009-10-17 00:00:00 665435 10001 9985 2 ... None None None None None None None 5.50 3.50 1.60
29 328 1 1 2009/2010 12 2009-10-24 00:00:00 665438 9985 10000 1 ... None None None None None None None 1.50 4.00 7.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19570 24492 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030140 9783 8558 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>47</comment><stats... 2.10 3.30 3.75
19571 24493 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030141 9869 8603 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>61</comment><stats... 2.30 3.20 3.30
19572 24494 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030142 8302 8634 2 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>44</comment><stats... 4.00 3.60 1.91
19573 24496 21484 21484 2015/2016 7 2015-10-02 00:00:00 2030144 9910 8305 0 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>56</comment><stats... 1.44 4.50 7.50
19574 24497 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030145 8581 10205 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>43</comment><stats... 4.33 3.40 1.91
19575 24498 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030146 9906 8633 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>44</comment><stats... 3.20 3.30 2.30
19576 24499 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030147 9864 8560 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>51</comment><stats... 2.38 3.10 3.30
19577 24500 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030148 8315 10267 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>57</comment><stats... 2.30 3.30 3.20
19578 24501 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030149 7878 9783 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>50</comment><stats... 2.60 3.20 2.80
19579 24502 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030150 8558 9869 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><event_incident_typefk>123</even... <corner><value><stats><corners>1</corners></st... <possession><value><comment>42</comment><stats... 2.00 3.30 4.00
19580 24503 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030151 8370 8603 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>60</comment><stats... 2.20 3.50 3.20
19581 24504 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030152 8634 8370 5 ... <shoton><value><event_incident_typefk>139</eve... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>55</comment><stats... 1.14 9.00 15.00
19582 24505 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030153 8372 8302 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><event_incident_typefk>123</even... <corner><value><stats><corners>1</corners></st... <possession><value><comment>42</comment><stats... 3.60 3.40 2.10
19583 24507 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030155 10205 9910 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>46</comment><stats... 2.00 3.50 3.75
19584 24508 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030156 8633 8581 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>57</comment><stats... 1.08 12.00 23.00
19585 24509 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030157 8560 9906 0 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>45</comment><stats... 4.00 3.20 2.05
19586 24510 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030158 10267 9864 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>62</comment><stats... 1.62 3.75 6.00
19587 24511 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030159 9783 8315 2 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>48</comment><stats... 2.70 3.10 2.80
19588 24512 21484 21484 2015/2016 8 2015-10-19 00:00:00 2030160 9869 7878 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>61</comment><stats... 2.00 3.20 4.20
19589 24513 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030161 8603 8558 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>47</comment><stats... 2.00 3.30 4.00
19590 24514 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030162 8634 8372 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>66</comment><stats... 1.11 10.00 19.00
19591 24515 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030163 8302 8305 5 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>56</comment><stats... 1.44 4.33 8.00
19592 24516 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030164 8306 10205 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>59</comment><stats... 3.50 3.25 2.20
19593 24517 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030165 9910 8633 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>46</comment><stats... 3.80 3.80 1.91
19594 24518 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030166 8581 8560 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>51</comment><stats... 2.63 3.20 2.80
19595 24519 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030167 9906 10267 2 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>49</comment><stats... 1.57 3.80 6.50
19596 24520 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030168 9864 9783 2 ... <shoton><value><event_incident_typefk>876</eve... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>53</comment><stats... 2.25 3.25 3.40
19597 24521 21484 21484 2015/2016 9 2015-10-26 00:00:00 2030169 8315 9869 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>49</comment><stats... 1.53 4.00 7.00
19598 24522 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030170 7878 8603 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>50</comment><stats... 2.30 3.25 3.25
19599 24523 21484 21484 2015/2016 9 2015-10-23 00:00:00 2030171 8370 8558 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>59</comment><stats... 2.20 3.40 3.20

19600 rows × 88 columns


In [6]:
#Raw features
matches2.columns.values


Out[6]:
array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession', 'B365H', 'B365D', 'B365A'], dtype=object)

Os dados presente nos campos goal, shoton, shotoff, foulcommit, card, cross, corner e possession estão no formato XML. Além disso, estes dados estao presentes em poucos jogos.


In [19]:
# 'shoton', 'shotoff','foulcommit', 'card','cross', 'corner', 'possession'
matches3 = matches2[matches2['goal'].notnull()]
matches3['possession'][1192]


Out[19]:
u'<possession><value><comment>56</comment><event_incident_typefk>352</event_incident_typefk><elapsed>25</elapsed><subtype>possession</subtype><sortorder>1</sortorder><awaypos>44</awaypos><homepos>56</homepos><n>68</n><type>special</type><id>379029</id></value><value><comment>54</comment><elapsed_plus>1</elapsed_plus><event_incident_typefk>352</event_incident_typefk><elapsed>45</elapsed><subtype>possession</subtype><sortorder>4</sortorder><awaypos>46</awaypos><homepos>54</homepos><n>117</n><type>special</type><id>379251</id></value><value><comment>54</comment><event_incident_typefk>352</event_incident_typefk><elapsed>70</elapsed><subtype>possession</subtype><sortorder>0</sortorder><awaypos>46</awaypos><homepos>54</homepos><n>190</n><type>special</type><id>379443</id></value><value><comment>55</comment><elapsed_plus>5</elapsed_plus><event_incident_typefk>352</event_incident_typefk><elapsed>90</elapsed><subtype>possession</subtype><sortorder>1</sortorder><awaypos>45</awaypos><homepos>55</homepos><n>252</n><type>special</type><id>379575</id></value></possession>'

In [ ]: