In [35]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random

In [36]:
database = 'input/database.sqlite'
conn = sqlite3.connect(database)

In [37]:
query = """select * from match where home_player_X1 is not null and 'home_player_Y1' is not null 
and  home_player_X2 is not null and 'home_player_Y2' is not null 
and  home_player_X3 is not null and home_player_Y3 is not null 
and  home_player_X4 is not null and home_player_Y4 is not null 
and  home_player_X5 is not null and home_player_Y5 is not null 
and  home_player_X6 is not null and home_player_Y6 is not null 
and  home_player_X7 is not null and home_player_Y7 is not null 
and  home_player_X8 is not null and home_player_Y8 is not null 
and  home_player_X9 is not null and home_player_Y9 is not null 
and  home_player_X10 is not null and home_player_Y10 is not null 
and  home_player_X11 is not null and home_player_Y11 is not null  
and  away_player_X1 is not null and away_player_Y1 is not null 
and  away_player_X2 is not null and away_player_Y2 is not null 
and  away_player_X3 is not null and away_player_Y3 is not null 
and  away_player_X4 is not null and away_player_Y4 is not null 
and  away_player_X5 is not null and away_player_Y5 is not null 
and  away_player_X6 is not null and away_player_Y6 is not null 
and  away_player_X7 is not null and away_player_Y7 is not null 
and  away_player_X8 is not null and away_player_Y8 is not null 
and  away_player_X9 is not null and away_player_Y9 is not null 
and  away_player_X10 is not null and away_player_Y10 is not null 
and  away_player_X11 is not null and away_player_Y11 is not null 
and home_team_goal is not null and away_team_goal is not null
and home_player_1 is not null 
and home_player_2 is not null  
and home_player_3 is not null  
and home_player_4 is not null  
and home_player_5 is not null  
and home_player_6 is not null  
and home_player_7 is not null  
and home_player_8 is not null 
and home_player_9 is not null  
and home_player_10 is not null  
and home_player_11 is not null  
and away_player_1 is not null  
and away_player_2 is not null  
and away_player_3 is not null  
and away_player_4 is not null  
and away_player_5 is not null  
and away_player_6 is not null  
and away_player_7 is not null  
and away_player_8 is not null  
and away_player_9 is not null  
and away_player_10 is not null  
and away_player_11 is not null  
and  B365H is not null and B365D is not null and B365A is not null;"""
matches = pd.read_sql(query, conn)
matches


Out[37]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... SJA VCH VCD VCA GBH GBD GBA BSH BSD BSA
0 146 1 1 2008/2009 24 2009-02-27 00:00:00 493017 8203 9987 2 ... 2.30 2.65 3.25 2.35 2.90 3.25 2.30 2.80 3.20 2.25
1 154 1 1 2008/2009 25 2009-03-08 00:00:00 493025 9984 8342 1 ... 2.25 2.65 3.20 2.35 2.90 3.20 2.30 2.62 3.20 2.38
2 156 1 1 2008/2009 25 2009-03-07 00:00:00 493027 8635 10000 2 ... 8.50 1.30 4.35 8.00 1.35 4.33 8.50 1.36 4.20 7.00
3 163 1 1 2008/2009 26 2009-03-13 00:00:00 493034 8203 8635 2 ... 1.73 4.35 3.30 1.75 4.50 3.40 1.75 4.20 3.30 1.75
4 169 1 1 2008/2009 26 2009-03-14 00:00:00 493040 10000 9999 0 ... 5.00 1.65 3.50 4.50 1.65 3.50 5.00 1.70 3.40 4.33
5 174 1 1 2008/2009 27 2009-03-22 00:00:00 493045 9991 10000 1 ... 4.75 1.60 3.40 5.00 1.65 3.40 5.00 1.62 3.50 5.00
6 177 1 1 2008/2009 27 2009-03-21 00:00:00 493048 9999 8203 1 ... 2.75 2.25 3.25 2.80 2.10 3.25 3.15 2.25 3.20 2.75
7 190 1 1 2008/2009 29 2009-04-12 00:00:00 493061 8635 8342 1 ... 4.75 1.65 3.40 5.00 1.70 3.40 4.50 1.73 3.40 4.20
8 191 1 1 2008/2009 29 2009-04-10 00:00:00 493062 9999 9987 1 ... 2.15 2.80 3.25 2.25 3.20 3.20 2.10 2.80 3.20 2.25
9 220 1 1 2008/2009 31 2009-04-26 00:00:00 493082 9999 9991 1 ... 2.20 3.20 3.20 2.10 3.00 3.25 2.15 2.88 3.25 2.20
10 227 1 1 2008/2009 32 2009-05-02 00:00:00 493089 10000 9985 0 ... 1.62 5.00 3.50 1.60 5.25 3.50 1.60 5.00 3.60 1.57
11 230 1 1 2008/2009 32 2009-05-02 00:00:00 493092 9991 9984 2 ... 8.50 1.35 4.00 8.50 1.35 4.50 7.00 1.33 4.50 7.50
12 232 1 1 2008/2009 32 2009-05-02 00:00:00 493094 10001 9999 1 ... 5.50 1.55 3.50 5.50 1.57 3.60 5.25 1.57 3.60 5.00
13 235 1 1 2008/2009 33 2009-05-09 00:00:00 493097 9985 8342 2 ... 7.50 1.45 3.80 6.50 1.47 3.75 6.50 1.50 3.75 5.50
14 241 1 1 2008/2009 33 2009-05-09 00:00:00 493103 8635 9999 3 ... 16.00 1.14 6.50 13.00 1.15 6.50 13.00 1.17 6.00 13.00
15 243 1 1 2008/2009 33 2009-05-09 00:00:00 493105 9984 10001 2 ... 2.63 2.60 3.25 2.40 2.50 3.30 2.50 2.50 3.20 2.50
16 244 1 1 2008/2009 34 2009-05-16 00:00:00 493106 9987 8635 0 ... 1.45 6.50 4.00 1.44 6.50 3.75 1.45 7.00 4.00 1.40
17 245 1 1 2008/2009 34 2009-05-16 00:00:00 493107 9991 9985 0 ... 1.70 4.50 3.60 1.65 4.50 3.60 1.65 4.50 3.50 1.67
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... 2.30 2.62 3.25 2.40 2.70 3.20 2.40 2.88 3.10 2.30
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... 3.30 2.00 3.25 3.40 2.10 3.30 3.10 2.25 3.20 2.80
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... 1.57 5.50 3.60 1.57 5.25 3.50 1.60 5.50 3.60 1.53
21 314 1 1 2009/2010 10 2009-10-04 00:00:00 665411 8342 8635 4 ... 2.75 2.40 3.25 2.62 2.45 3.20 2.70 2.38 3.20 2.62
22 318 1 1 2009/2010 10 2009-10-03 00:00:00 665417 8203 9994 2 ... 4.00 1.85 3.30 3.75 1.95 3.25 3.75 1.91 3.25 3.60
23 320 1 1 2009/2010 10 2009-10-03 00:00:00 665421 9993 10001 3 ... 4.50 1.73 3.40 4.33 1.75 3.40 4.50 1.75 3.30 4.20
24 321 1 1 2009/2010 11 2009-10-18 00:00:00 665425 8342 9991 1 ... 4.75 1.70 3.50 4.00 1.60 3.50 5.25 1.67 3.40 4.75
25 323 1 1 2009/2010 11 2009-10-18 00:00:00 665427 9987 8203 1 ... 4.33 1.75 3.40 4.33 1.83 3.30 4.00 1.83 3.30 3.80
26 324 1 1 2009/2010 11 2009-10-17 00:00:00 665429 10000 9993 4 ... 3.60 1.90 3.30 3.60 2.05 3.25 3.30 2.00 3.25 3.30
27 325 1 1 2009/2010 11 2009-10-17 00:00:00 665430 9994 9984 1 ... 3.40 2.00 3.25 3.40 2.10 3.20 3.20 2.05 3.25 3.20
28 327 1 1 2009/2010 11 2009-10-17 00:00:00 665435 10001 9985 2 ... 1.67 5.50 3.40 1.60 5.00 3.40 1.65 4.75 3.40 1.67
29 328 1 1 2009/2010 12 2009-10-24 00:00:00 665438 9985 10000 1 ... 6.50 1.44 3.75 6.50 1.40 4.00 7.50 1.44 3.75 6.50
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19570 24492 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030140 9783 8558 3 ... NaN 2.10 3.40 3.90 NaN NaN NaN NaN NaN NaN
19571 24493 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030141 9869 8603 1 ... NaN 2.30 3.30 3.40 NaN NaN NaN NaN NaN NaN
19572 24494 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030142 8302 8634 2 ... NaN 4.10 3.90 1.91 NaN NaN NaN NaN NaN NaN
19573 24496 21484 21484 2015/2016 7 2015-10-02 00:00:00 2030144 9910 8305 0 ... NaN 1.50 4.50 7.50 NaN NaN NaN NaN NaN NaN
19574 24497 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030145 8581 10205 1 ... NaN 4.40 3.60 1.93 NaN NaN NaN NaN NaN NaN
19575 24498 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030146 9906 8633 1 ... NaN 3.13 3.40 2.45 NaN NaN NaN NaN NaN NaN
19576 24499 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030147 9864 8560 3 ... NaN 2.40 3.25 3.30 NaN NaN NaN NaN NaN NaN
19577 24500 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030148 8315 10267 3 ... NaN 2.38 3.40 3.25 NaN NaN NaN NaN NaN NaN
19578 24501 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030149 7878 9783 1 ... NaN 2.63 3.25 3.00 NaN NaN NaN NaN NaN NaN
19579 24502 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030150 8558 9869 1 ... NaN 2.00 3.40 4.30 NaN NaN NaN NaN NaN NaN
19580 24503 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030151 8370 8603 0 ... NaN 2.20 3.80 3.30 NaN NaN NaN NaN NaN NaN
19581 24504 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030152 8634 8370 5 ... NaN 1.15 9.50 19.00 NaN NaN NaN NaN NaN NaN
19582 24505 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030153 8372 8302 1 ... NaN 3.60 3.50 2.20 NaN NaN NaN NaN NaN NaN
19583 24507 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030155 10205 9910 1 ... NaN 2.05 3.70 3.75 NaN NaN NaN NaN NaN NaN
19584 24508 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030156 8633 8581 3 ... NaN 1.08 13.00 34.00 NaN NaN NaN NaN NaN NaN
19585 24509 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030157 8560 9906 0 ... NaN 4.30 3.30 2.05 NaN NaN NaN NaN NaN NaN
19586 24510 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030158 10267 9864 3 ... NaN 1.62 4.10 6.25 NaN NaN NaN NaN NaN NaN
19587 24511 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030159 9783 8315 2 ... NaN 2.70 3.25 2.88 NaN NaN NaN NaN NaN NaN
19588 24512 21484 21484 2015/2016 8 2015-10-19 00:00:00 2030160 9869 7878 3 ... NaN 2.00 3.40 4.33 NaN NaN NaN NaN NaN NaN
19589 24513 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030161 8603 8558 1 ... NaN 2.05 3.50 4.00 NaN NaN NaN NaN NaN NaN
19590 24514 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030162 8634 8372 3 ... NaN 1.11 11.00 31.00 NaN NaN NaN NaN NaN NaN
19591 24515 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030163 8302 8305 5 ... NaN 1.50 4.60 7.50 NaN NaN NaN NaN NaN NaN
19592 24516 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030164 8306 10205 0 ... NaN 3.60 3.40 2.20 NaN NaN NaN NaN NaN NaN
19593 24517 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030165 9910 8633 1 ... NaN 3.90 4.00 1.93 NaN NaN NaN NaN NaN NaN
19594 24518 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030166 8581 8560 0 ... NaN 2.75 3.13 2.90 NaN NaN NaN NaN NaN NaN
19595 24519 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030167 9906 10267 2 ... NaN 1.57 4.00 7.00 NaN NaN NaN NaN NaN NaN
19596 24520 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030168 9864 9783 2 ... NaN 2.30 3.40 3.40 NaN NaN NaN NaN NaN NaN
19597 24521 21484 21484 2015/2016 9 2015-10-26 00:00:00 2030169 8315 9869 3 ... NaN 1.55 4.20 7.00 NaN NaN NaN NaN NaN NaN
19598 24522 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030170 7878 8603 1 ... NaN 2.30 3.40 3.30 NaN NaN NaN NaN NaN NaN
19599 24523 21484 21484 2015/2016 9 2015-10-23 00:00:00 2030171 8370 8558 3 ... NaN 2.20 3.60 3.50 NaN NaN NaN NaN NaN NaN

19600 rows × 115 columns

Como podemos observar, temos 19600 jogos com informação sobre a posição dos jogadores e suas informações. Entretanto precisamos refinar as features.


In [38]:
query = """ select * from Player as p ,Player_Stats as s where p.player_api_id = s.player_api_id;"""
player = pd.read_sql(query, conn)
player[['player_api_id','date_stat']]


Out[38]:
player_api_id player_api_id date_stat
0 505942 505942 2016-02-18 00:00:00
1 505942 505942 2015-11-19 00:00:00
2 505942 505942 2015-09-21 00:00:00
3 505942 505942 2015-03-20 00:00:00
4 505942 505942 2007-02-22 00:00:00
5 155782 155782 2016-04-21 00:00:00
6 155782 155782 2016-04-07 00:00:00
7 155782 155782 2016-01-07 00:00:00
8 155782 155782 2015-12-24 00:00:00
9 155782 155782 2015-12-17 00:00:00
10 155782 155782 2015-10-16 00:00:00
11 155782 155782 2015-09-25 00:00:00
12 155782 155782 2015-09-21 00:00:00
13 155782 155782 2015-01-09 00:00:00
14 155782 155782 2014-12-05 00:00:00
15 155782 155782 2014-11-07 00:00:00
16 155782 155782 2014-09-18 00:00:00
17 155782 155782 2014-05-02 00:00:00
18 155782 155782 2014-04-04 00:00:00
19 155782 155782 2014-03-14 00:00:00
20 155782 155782 2013-12-13 00:00:00
21 155782 155782 2013-11-08 00:00:00
22 155782 155782 2013-10-04 00:00:00
23 155782 155782 2013-09-20 00:00:00
24 155782 155782 2013-05-03 00:00:00
25 155782 155782 2013-03-22 00:00:00
26 155782 155782 2013-03-15 00:00:00
27 155782 155782 2013-02-22 00:00:00
28 155782 155782 2013-02-15 00:00:00
29 155782 155782 2012-08-31 00:00:00
... ... ... ...
183223 108760 108760 2014-09-18 00:00:00
183224 108760 108760 2013-12-06 00:00:00
183225 108760 108760 2013-11-22 00:00:00
183226 108760 108760 2013-09-20 00:00:00
183227 108760 108760 2009-08-30 00:00:00
183228 108760 108760 2007-02-22 00:00:00
183229 39494 39494 2016-04-14 00:00:00
183230 39494 39494 2016-02-11 00:00:00
183231 39494 39494 2015-10-09 00:00:00
183232 39494 39494 2015-09-21 00:00:00
183233 39494 39494 2015-01-09 00:00:00
183234 39494 39494 2014-10-31 00:00:00
183235 39494 39494 2014-09-18 00:00:00
183236 39494 39494 2014-03-28 00:00:00
183237 39494 39494 2014-03-14 00:00:00
183238 39494 39494 2014-02-28 00:00:00
183239 39494 39494 2013-09-20 00:00:00
183240 39494 39494 2013-04-26 00:00:00
183241 39494 39494 2013-02-15 00:00:00
183242 39494 39494 2012-08-31 00:00:00
183243 39494 39494 2012-02-22 00:00:00
183244 39494 39494 2011-08-30 00:00:00
183245 39494 39494 2011-02-22 00:00:00
183246 39494 39494 2010-08-30 00:00:00
183247 39494 39494 2010-02-22 00:00:00
183248 39494 39494 2009-08-30 00:00:00
183249 39494 39494 2009-02-22 00:00:00
183250 39494 39494 2008-08-30 00:00:00
183251 39494 39494 2007-08-30 00:00:00
183252 39494 39494 2007-02-22 00:00:00

183253 rows × 3 columns


In [39]:
drop = matches.columns.values[-27:-1]
print drop 
#Removing other betting houses odds
matches2 = matches.drop(drop,1)
matches2= matches2.drop('BSA',1)
matches2


['BWH' 'BWD' 'BWA' 'IWH' 'IWD' 'IWA' 'LBH' 'LBD' 'LBA' 'PSH' 'PSD' 'PSA'
 'WHH' 'WHD' 'WHA' 'SJH' 'SJD' 'SJA' 'VCH' 'VCD' 'VCA' 'GBH' 'GBD' 'GBA'
 'BSH' 'BSD']
Out[39]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... shoton shotoff foulcommit card cross corner possession B365H B365D B365A
0 146 1 1 2008/2009 24 2009-02-27 00:00:00 493017 8203 9987 2 ... None None None None None None None 3.00 3.40 2.30
1 154 1 1 2008/2009 25 2009-03-08 00:00:00 493025 9984 8342 1 ... None None None None None None None 2.80 3.20 2.37
2 156 1 1 2008/2009 25 2009-03-07 00:00:00 493027 8635 10000 2 ... None None None None None None None 1.40 4.50 8.00
3 163 1 1 2008/2009 26 2009-03-13 00:00:00 493034 8203 8635 2 ... None None None None None None None 4.50 3.60 1.75
4 169 1 1 2008/2009 26 2009-03-14 00:00:00 493040 10000 9999 0 ... None None None None None None None 1.75 3.50 4.75
5 174 1 1 2008/2009 27 2009-03-22 00:00:00 493045 9991 10000 1 ... None None None None None None None 1.65 3.60 4.75
6 177 1 1 2008/2009 27 2009-03-21 00:00:00 493048 9999 8203 1 ... None None None None None None None 2.20 3.30 3.00
7 190 1 1 2008/2009 29 2009-04-12 00:00:00 493061 8635 8342 1 ... None None None None None None None 1.67 3.50 5.00
8 191 1 1 2008/2009 29 2009-04-10 00:00:00 493062 9999 9987 1 ... None None None None None None None 3.20 3.25 2.25
9 220 1 1 2008/2009 31 2009-04-26 00:00:00 493082 9999 9991 1 ... None None None None None None None 3.20 3.30 2.10
10 227 1 1 2008/2009 32 2009-05-02 00:00:00 493089 10000 9985 0 ... None None None None None None None 5.50 3.60 1.57
11 230 1 1 2008/2009 32 2009-05-02 00:00:00 493092 9991 9984 2 ... None None None None None None None 1.36 4.33 7.50
12 232 1 1 2008/2009 32 2009-05-02 00:00:00 493094 10001 9999 1 ... None None None None None None None 1.57 3.60 5.50
13 235 1 1 2008/2009 33 2009-05-09 00:00:00 493097 9985 8342 2 ... None None None None None None None 1.44 4.33 7.00
14 241 1 1 2008/2009 33 2009-05-09 00:00:00 493103 8635 9999 3 ... None None None None None None None 1.14 7.50 19.00
15 243 1 1 2008/2009 33 2009-05-09 00:00:00 493105 9984 10001 2 ... None None None None None None None 2.62 3.30 2.62
16 244 1 1 2008/2009 34 2009-05-16 00:00:00 493106 9987 8635 0 ... None None None None None None None 8.00 4.00 1.40
17 245 1 1 2008/2009 34 2009-05-16 00:00:00 493107 9991 9985 0 ... None None None None None None None 4.33 3.80 1.67
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... None None None None None None None 2.88 3.30 2.25
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... None None None None None None None 2.30 3.30 2.88
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... None None None None None None None 5.75 3.60 1.57
21 314 1 1 2009/2010 10 2009-10-04 00:00:00 665411 8342 8635 4 ... None None None None None None None 2.50 3.25 2.60
22 318 1 1 2009/2010 10 2009-10-03 00:00:00 665417 8203 9994 2 ... None None None None None None None 1.91 3.30 4.20
23 320 1 1 2009/2010 10 2009-10-03 00:00:00 665421 9993 10001 3 ... None None None None None None None 1.73 3.50 5.00
24 321 1 1 2009/2010 11 2009-10-18 00:00:00 665425 8342 9991 1 ... None None None None None None None 1.70 3.40 4.75
25 323 1 1 2009/2010 11 2009-10-18 00:00:00 665427 9987 8203 1 ... None None None None None None None 1.85 3.30 4.00
26 324 1 1 2009/2010 11 2009-10-17 00:00:00 665429 10000 9993 4 ... None None None None None None None 1.91 3.30 3.80
27 325 1 1 2009/2010 11 2009-10-17 00:00:00 665430 9994 9984 1 ... None None None None None None None 2.10 3.20 3.30
28 327 1 1 2009/2010 11 2009-10-17 00:00:00 665435 10001 9985 2 ... None None None None None None None 5.50 3.50 1.60
29 328 1 1 2009/2010 12 2009-10-24 00:00:00 665438 9985 10000 1 ... None None None None None None None 1.50 4.00 7.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19570 24492 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030140 9783 8558 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>47</comment><stats... 2.10 3.30 3.75
19571 24493 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030141 9869 8603 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>61</comment><stats... 2.30 3.20 3.30
19572 24494 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030142 8302 8634 2 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>44</comment><stats... 4.00 3.60 1.91
19573 24496 21484 21484 2015/2016 7 2015-10-02 00:00:00 2030144 9910 8305 0 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>56</comment><stats... 1.44 4.50 7.50
19574 24497 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030145 8581 10205 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>43</comment><stats... 4.33 3.40 1.91
19575 24498 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030146 9906 8633 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>44</comment><stats... 3.20 3.30 2.30
19576 24499 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030147 9864 8560 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>51</comment><stats... 2.38 3.10 3.30
19577 24500 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030148 8315 10267 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>57</comment><stats... 2.30 3.30 3.20
19578 24501 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030149 7878 9783 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>50</comment><stats... 2.60 3.20 2.80
19579 24502 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030150 8558 9869 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><event_incident_typefk>123</even... <corner><value><stats><corners>1</corners></st... <possession><value><comment>42</comment><stats... 2.00 3.30 4.00
19580 24503 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030151 8370 8603 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>60</comment><stats... 2.20 3.50 3.20
19581 24504 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030152 8634 8370 5 ... <shoton><value><event_incident_typefk>139</eve... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>55</comment><stats... 1.14 9.00 15.00
19582 24505 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030153 8372 8302 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><event_incident_typefk>123</even... <corner><value><stats><corners>1</corners></st... <possession><value><comment>42</comment><stats... 3.60 3.40 2.10
19583 24507 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030155 10205 9910 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>46</comment><stats... 2.00 3.50 3.75
19584 24508 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030156 8633 8581 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>57</comment><stats... 1.08 12.00 23.00
19585 24509 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030157 8560 9906 0 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>45</comment><stats... 4.00 3.20 2.05
19586 24510 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030158 10267 9864 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>62</comment><stats... 1.62 3.75 6.00
19587 24511 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030159 9783 8315 2 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>48</comment><stats... 2.70 3.10 2.80
19588 24512 21484 21484 2015/2016 8 2015-10-19 00:00:00 2030160 9869 7878 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>61</comment><stats... 2.00 3.20 4.20
19589 24513 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030161 8603 8558 1 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>47</comment><stats... 2.00 3.30 4.00
19590 24514 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030162 8634 8372 3 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>66</comment><stats... 1.11 10.00 19.00
19591 24515 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030163 8302 8305 5 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>56</comment><stats... 1.44 4.33 8.00
19592 24516 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030164 8306 10205 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>59</comment><stats... 3.50 3.25 2.20
19593 24517 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030165 9910 8633 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>46</comment><stats... 3.80 3.80 1.91
19594 24518 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030166 8581 8560 0 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>51</comment><stats... 2.63 3.20 2.80
19595 24519 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030167 9906 10267 2 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>49</comment><stats... 1.57 3.80 6.50
19596 24520 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030168 9864 9783 2 ... <shoton><value><event_incident_typefk>876</eve... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>53</comment><stats... 2.25 3.25 3.40
19597 24521 21484 21484 2015/2016 9 2015-10-26 00:00:00 2030169 8315 9869 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>49</comment><stats... 1.53 4.00 7.00
19598 24522 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030170 7878 8603 1 ... <shoton><value><stats><shoton>1</shoton></stat... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>50</comment><stats... 2.30 3.25 3.25
19599 24523 21484 21484 2015/2016 9 2015-10-23 00:00:00 2030171 8370 8558 3 ... <shoton><value><stats><blocked>1</blocked></st... <shotoff><value><stats><shotoff>1</shotoff></s... <foulcommit><value><stats><foulscommitted>1</f... <card><value><comment>y</comment><stats><ycard... <cross><value><stats><crosses>1</crosses></sta... <corner><value><stats><corners>1</corners></st... <possession><value><comment>59</comment><stats... 2.20 3.40 3.20

19600 rows × 88 columns


In [40]:
#Raw features
matches2.columns.values


Out[40]:
array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession', 'B365H', 'B365D', 'B365A'], dtype=object)

Os dados presente nos campos goal, shoton, shotoff, foulcommit, card, cross, corner e possession estão no formato XML. Além disso, estes dados estao presentes em poucos jogos.


In [41]:
# 'shoton', 'shotoff','foulcommit', 'card','cross', 'corner', 'possession'
matches3 = matches2[matches2['goal'].notnull()]
matches3['possession'][1192]


Out[41]:
u'<possession><value><comment>56</comment><event_incident_typefk>352</event_incident_typefk><elapsed>25</elapsed><subtype>possession</subtype><sortorder>1</sortorder><awaypos>44</awaypos><homepos>56</homepos><n>68</n><type>special</type><id>379029</id></value><value><comment>54</comment><elapsed_plus>1</elapsed_plus><event_incident_typefk>352</event_incident_typefk><elapsed>45</elapsed><subtype>possession</subtype><sortorder>4</sortorder><awaypos>46</awaypos><homepos>54</homepos><n>117</n><type>special</type><id>379251</id></value><value><comment>54</comment><event_incident_typefk>352</event_incident_typefk><elapsed>70</elapsed><subtype>possession</subtype><sortorder>0</sortorder><awaypos>46</awaypos><homepos>54</homepos><n>190</n><type>special</type><id>379443</id></value><value><comment>55</comment><elapsed_plus>5</elapsed_plus><event_incident_typefk>352</event_incident_typefk><elapsed>90</elapsed><subtype>possession</subtype><sortorder>1</sortorder><awaypos>45</awaypos><homepos>55</homepos><n>252</n><type>special</type><id>379575</id></value></possession>'

In [42]:
matches2= matches2.drop(['goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession'],1)

In [43]:
matches2


Out[43]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... away_player_5 away_player_6 away_player_7 away_player_8 away_player_9 away_player_10 away_player_11 B365H B365D B365A
0 146 1 1 2008/2009 24 2009-02-27 00:00:00 493017 8203 9987 2 ... 148314 37202 43158 9307 42153 32690 38782 3.00 3.40 2.30
1 154 1 1 2008/2009 25 2009-03-08 00:00:00 493025 9984 8342 1 ... 38366 37983 39578 38336 52280 27423 38440 2.80 3.20 2.37
2 156 1 1 2008/2009 25 2009-03-07 00:00:00 493027 8635 10000 2 ... 94030 37893 37981 131531 130027 38231 131530 1.40 4.50 8.00
3 163 1 1 2008/2009 26 2009-03-13 00:00:00 493034 8203 8635 2 ... 164694 30949 38378 38383 38393 38253 37069 4.50 3.60 1.75
4 169 1 1 2008/2009 26 2009-03-14 00:00:00 493040 10000 9999 0 ... 94284 45832 26669 33671 163670 37945 33622 1.75 3.50 4.75
5 174 1 1 2008/2009 27 2009-03-22 00:00:00 493045 9991 10000 1 ... 94030 37981 131531 131530 130027 38231 75500 1.65 3.60 4.75
6 177 1 1 2008/2009 27 2009-03-21 00:00:00 493048 9999 8203 1 ... 164352 67898 37112 67959 148286 33657 26502 2.20 3.30 3.00
7 190 1 1 2008/2009 29 2009-04-12 00:00:00 493061 8635 8342 1 ... 37858 37983 27364 38336 38366 27423 38440 1.67 3.50 5.00
8 191 1 1 2008/2009 29 2009-04-10 00:00:00 493062 9999 9987 1 ... 148314 38782 43158 20445 32690 37202 38794 3.20 3.25 2.25
9 220 1 1 2008/2009 31 2009-04-26 00:00:00 493082 9999 9991 1 ... 104382 33662 38233 32760 38229 12574 46335 3.20 3.30 2.10
10 227 1 1 2008/2009 32 2009-05-02 00:00:00 493089 10000 9985 0 ... 47411 35412 39631 39591 148335 37262 148315 5.50 3.60 1.57
11 230 1 1 2008/2009 32 2009-05-02 00:00:00 493092 9991 9984 2 ... 38186 27110 37957 37909 104386 38251 12692 1.36 4.33 7.50
12 232 1 1 2008/2009 32 2009-05-02 00:00:00 493094 10001 9999 1 ... 26669 33671 94284 163670 33622 148336 94288 1.57 3.60 5.50
13 235 1 1 2008/2009 33 2009-05-09 00:00:00 493097 9985 8342 2 ... 38441 37979 27364 38336 38366 38440 34025 1.44 4.33 7.00
14 241 1 1 2008/2009 33 2009-05-09 00:00:00 493103 8635 9999 3 ... 94284 45832 26669 33671 163670 148336 33622 1.14 7.50 19.00
15 243 1 1 2008/2009 33 2009-05-09 00:00:00 493105 9984 10001 2 ... 38353 21834 39848 17883 50160 37953 30404 2.62 3.30 2.62
16 244 1 1 2008/2009 34 2009-05-16 00:00:00 493106 9987 8635 0 ... 38389 30949 38393 38253 38798 38383 37069 8.00 4.00 1.40
17 245 1 1 2008/2009 34 2009-05-16 00:00:00 493107 9991 9985 0 ... 156551 35412 26224 39631 39591 37262 38369 4.33 3.80 1.67
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... 41109 33662 37044 26916 37065 12574 46335 2.88 3.30 2.25
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... 94030 37902 37886 37981 131531 75500 130027 2.30 3.30 2.88
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... 38389 38378 38383 38393 38253 37069 46552 5.75 3.60 1.57
21 314 1 1 2009/2010 10 2009-10-04 00:00:00 665411 8342 8635 4 ... 38253 38378 38383 38393 106013 37069 12692 2.50 3.25 2.60
22 318 1 1 2009/2010 10 2009-10-03 00:00:00 665417 8203 9994 2 ... 38273 38945 38290 95609 15662 34334 69629 1.91 3.30 4.20
23 320 1 1 2009/2010 10 2009-10-03 00:00:00 665421 9993 10001 3 ... 67896 37025 38343 179058 182605 39848 25619 1.73 3.50 5.00
24 321 1 1 2009/2010 11 2009-10-18 00:00:00 665425 8342 9991 1 ... 104382 33662 26916 32760 37065 12574 46335 1.70 3.40 4.75
25 323 1 1 2009/2010 11 2009-10-18 00:00:00 665427 9987 8203 1 ... 164352 67898 37112 38969 178291 17276 148286 1.85 3.30 4.00
26 324 1 1 2009/2010 11 2009-10-17 00:00:00 665429 10000 9993 4 ... 38322 38371 36852 38784 38786 33622 178284 1.91 3.30 3.80
27 325 1 1 2009/2010 11 2009-10-17 00:00:00 665430 9994 9984 1 ... 37051 38357 37957 38782 104386 78462 38251 2.10 3.20 3.30
28 327 1 1 2009/2010 11 2009-10-17 00:00:00 665435 10001 9985 2 ... 129462 32573 37262 35412 26224 148335 38369 5.50 3.50 1.60
29 328 1 1 2009/2010 12 2009-10-24 00:00:00 665438 9985 10000 1 ... 37889 37893 37981 131531 38231 75500 104377 1.50 4.00 7.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19570 24492 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030140 9783 8558 3 ... 358138 498033 46836 107930 52004 41622 246438 2.10 3.30 3.75
19571 24493 21484 21484 2015/2016 6 2015-09-27 00:00:00 2030141 9869 8603 1 ... 445907 183350 111801 531629 37824 193226 96652 2.30 3.20 3.30
19572 24494 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030142 8302 8634 2 ... 150739 25773 154257 38818 522579 40636 19533 4.00 3.60 1.91
19573 24496 21484 21484 2015/2016 7 2015-10-02 00:00:00 2030144 9910 8305 0 ... 518240 37449 523390 240054 369800 38573 213489 1.44 4.50 7.50
19574 24497 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030145 8581 10205 1 ... 58499 474672 193978 629594 268282 37506 517941 4.33 3.40 1.91
19575 24498 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030146 9906 8633 1 ... 28467 95078 208494 31097 191315 26166 30893 3.20 3.30 2.30
19576 24499 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030147 9864 8560 3 ... 47565 205278 282678 75342 150649 99047 46289 2.38 3.10 3.30
19577 24500 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030148 8315 10267 3 ... 173427 74747 75307 93480 361770 432463 75310 2.30 3.30 3.20
19578 24501 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030149 7878 9783 1 ... 33848 279173 40668 51953 198566 495107 194660 2.60 3.20 2.80
19579 24502 21484 21484 2015/2016 7 2015-10-03 00:00:00 2030150 8558 9869 1 ... 74752 242477 202639 541557 395154 282689 474448 2.00 3.30 4.00
19580 24503 21484 21484 2015/2016 7 2015-10-04 00:00:00 2030151 8370 8603 0 ... 445907 183350 535600 111801 193226 114746 96652 2.20 3.50 3.20
19581 24504 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030152 8634 8370 5 ... 40220 427893 75004 256727 36130 210065 46808 1.14 9.00 15.00
19582 24505 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030153 8372 8302 1 ... 41092 154232 109621 213805 111237 38821 26392 3.60 3.40 2.10
19583 24507 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030155 10205 9910 1 ... 360918 246177 72623 74169 49836 174472 213486 2.00 3.50 3.75
19584 24508 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030156 8633 8581 3 ... 37484 184091 25462 364244 513086 303088 476769 1.08 12.00 23.00
19585 24509 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030157 8560 9906 0 ... 41167 432950 30871 33635 184533 51360 184138 4.00 3.20 2.05
19586 24510 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030158 10267 9864 3 ... 41470 683450 112758 432591 37450 215927 213653 1.62 3.75 6.00
19587 24511 21484 21484 2015/2016 8 2015-10-18 00:00:00 2030159 9783 8315 2 ... 154938 182224 45749 604105 33871 96619 33028 2.70 3.10 2.80
19588 24512 21484 21484 2015/2016 8 2015-10-19 00:00:00 2030160 9869 7878 3 ... 239350 170667 534484 2805 80492 43372 161291 2.00 3.20 4.20
19589 24513 21484 21484 2015/2016 8 2015-10-17 00:00:00 2030161 8603 8558 1 ... 190452 161651 88986 498033 52004 107930 41622 2.00 3.30 4.00
19590 24514 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030162 8634 8372 3 ... 246218 434404 433311 156097 187360 97762 200917 1.11 10.00 19.00
19591 24515 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030163 8302 8305 5 ... 46391 32999 37449 75192 240054 369800 183548 1.44 4.33 8.00
19592 24516 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030164 8306 10205 0 ... 160844 282281 189101 42479 562062 517941 201939 3.50 3.25 2.20
19593 24517 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030165 9910 8633 1 ... 28467 31097 208494 95078 359193 30893 281085 3.80 3.80 1.91
19594 24518 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030166 8581 8560 0 ... 80295 102622 205278 75342 33973 99047 46289 2.63 3.20 2.80
19595 24519 21484 21484 2015/2016 9 2015-10-25 00:00:00 2030167 9906 10267 2 ... 391058 75307 40148 570432 361770 193869 428947 1.57 3.80 6.50
19596 24520 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030168 9864 9783 2 ... 33848 279173 51953 40668 474680 37653 194660 2.25 3.25 3.40
19597 24521 21484 21484 2015/2016 9 2015-10-26 00:00:00 2030169 8315 9869 3 ... 239965 359191 202639 242477 541557 395154 206301 1.53 4.00 7.00
19598 24522 21484 21484 2015/2016 9 2015-10-24 00:00:00 2030170 7878 8603 1 ... 445907 37824 111801 535600 193226 114746 96652 2.30 3.25 3.25
19599 24523 21484 21484 2015/2016 9 2015-10-23 00:00:00 2030171 8370 8558 3 ... 190452 107930 161651 88986 498033 52004 41622 2.20 3.40 3.20

19600 rows × 80 columns


In [162]:
X=[]
y=[] 
y2=[]
X2=[]
for i in range(1,12):
  X.append(matches2[('home_player_X%d'%i)][19555])
  y.append(matches2[('home_player_Y%d'%i)][19555])
  X2.append(matches2[('away_player_X%d'%i)][19555])
  y2.append(matches2[('away_player_Y%d'%i)][19555])

In [163]:
print X2,y2

X2[:] = [x  for x in X2]
y2[:] = [(24-x) for x in y2]
plt.axis([0, 12 , 0, 24])
plt.plot(X,y,'g^',X2,y2,'r^')
plt.show()


[1, 2, 4, 6, 8, 4, 6, 3, 5, 7, 5] [1, 3, 3, 3, 3, 6, 6, 8, 8, 8, 11]

In [34]:
print X,y
print X2,y2


[1, 1, 3, 5, 7, 9, 3, 5, 7, 4, 6] [1, 3, 3, 3, 3, 3, 7, 7, 7, 10, 10]
[1, 2, 4, 6, 8, 4, 6, 3, 5, 7, 5] [23, 21, 21, 21, 21, 18, 18, 16, 16, 16, 13]

In [192]:
import re
def def_formations(matches):
    form=[]
    for index,row in matches3.iterrows():
        b= row.values
        dfs = (b <= 3).sum()
        mid1 = ((b >= 4) & (b<=6)).sum()
        mid2 = ((b >= 7) & (b<=9)).sum()
        atk1 = ((b >= 10)).sum()
        formation="%d-%d-%d-%d"%(dfs,mid1,mid2,atk1)
        formation = re.sub('0-','',formation)
        form.append(formation)
    return form

In [193]:
positions_home= matches2[['home_player_Y2',
                     'home_player_Y3',
                     'home_player_Y4',
                     'home_player_Y5',
                     'home_player_Y6',
                     'home_player_Y7',
                     'home_player_Y8',
                     'home_player_Y9',
                     'home_player_Y10',
                     'home_player_Y11']]
positions_away = matches2[['away_player_Y2',
                     'away_player_Y3',
                     'away_player_Y4',
                     'away_player_Y5',
                     'away_player_Y6',
                     'away_player_Y7',
                     'away_player_Y8',
                     'away_player_Y9',
                     'away_player_Y10',
                     'away_player_Y11']]
formation_home=def_formations(positions_home)
formation_away=def_formations(positions_away)

In [194]:
matches2['formation_home']=formation_home
matches2['formation_away']=formation_away

In [195]:
matches2['formation_home'].unique()
matches2['formation_away'].unique()
#matches2.drop(['formation_home','formation_away'],axis=1)


Out[195]:
array(['4-4-2', '3-4-3', '3-5-2', '4-3-3', '5-3-2', '5-4-1', '3-3-3-1',
       '4-5-1', '4-1-4-1', '4-2-3-1', '3-6-1', '4-3-2-1', '4-1-3-2',
       '4-2-1-3', '4-3-1-2', '4-1-2-3', '4-2-2-2', '3-2-3-2', '5-2-2-1'], dtype=object)

In [202]:
matches_with_formation = matches2.drop(['home_player_Y1','home_player_Y2',
                     'home_player_Y3',
                     'home_player_Y4',
                     'home_player_Y5',
                     'home_player_Y6',
                     'home_player_Y7',
                     'home_player_Y8',
                     'home_player_Y9',
                     'home_player_Y10',
                     'home_player_Y11',
                     'home_player_X1',
                     'home_player_X2',
                     'home_player_X3',
                     'home_player_X4',
                     'home_player_X5',
                     'home_player_X6',
                     'home_player_X7',
                     'home_player_X8',
                     'home_player_X9',
                     'home_player_X10',
                     'home_player_X11',
                     'away_player_Y1',
                     'away_player_Y2',
                     'away_player_Y3',
                     'away_player_Y4',
                     'away_player_Y5',
                     'away_player_Y6',
                     'away_player_Y7',
                     'away_player_Y8',
                     'away_player_Y9',
                     'away_player_Y10',
                     'away_player_Y11',
                     'away_player_X1',
                     'away_player_X2',
                     'away_player_X3',
                     'away_player_X4',
                     'away_player_X5',
                     'away_player_X6',
                     'away_player_X7',
                     'away_player_X8',
                     'away_player_X9',
                     'away_player_X10',
                     'away_player_X11'],1)
matches_with_formation.columns.values


Out[202]:
array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_1',
       'home_player_2', 'home_player_3', 'home_player_4', 'home_player_5',
       'home_player_6', 'home_player_7', 'home_player_8', 'home_player_9',
       'home_player_10', 'home_player_11', 'away_player_1',
       'away_player_2', 'away_player_3', 'away_player_4', 'away_player_5',
       'away_player_6', 'away_player_7', 'away_player_8', 'away_player_9',
       'away_player_10', 'away_player_11', 'B365H', 'B365D', 'B365A',
       'formation_home', 'formation_away'], dtype=object)