notebook.community

Edit and run



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random
database = 'database.sqlite'
conn = sqlite3.connect(database)



In [2]:

    
query = """select * from match where home_player_X1 is not null and 'home_player_Y1' is not null 
and  home_player_X2 is not null and 'home_player_Y2' is not null 
and  home_player_X3 is not null and home_player_Y3 is not null 
and  home_player_X4 is not null and home_player_Y4 is not null 
and  home_player_X5 is not null and home_player_Y5 is not null 
and  home_player_X6 is not null and home_player_Y6 is not null 
and  home_player_X7 is not null and home_player_Y7 is not null 
and  home_player_X8 is not null and home_player_Y8 is not null 
and  home_player_X9 is not null and home_player_Y9 is not null 
and  home_player_X10 is not null and home_player_Y10 is not null 
and  home_player_X11 is not null and home_player_Y11 is not null  
and  away_player_X1 is not null and away_player_Y1 is not null 
and  away_player_X2 is not null and away_player_Y2 is not null 
and  away_player_X3 is not null and away_player_Y3 is not null 
and  away_player_X4 is not null and away_player_Y4 is not null 
and  away_player_X5 is not null and away_player_Y5 is not null 
and  away_player_X6 is not null and away_player_Y6 is not null 
and  away_player_X7 is not null and away_player_Y7 is not null 
and  away_player_X8 is not null and away_player_Y8 is not null 
and  away_player_X9 is not null and away_player_Y9 is not null 
and  away_player_X10 is not null and away_player_Y10 is not null 
and  away_player_X11 is not null and away_player_Y11 is not null 
and home_team_goal is not null and away_team_goal is not null
and home_player_1 is not null 
and home_player_2 is not null  
and home_player_3 is not null  
and home_player_4 is not null  
and home_player_5 is not null  
and home_player_6 is not null  
and home_player_7 is not null  
and home_player_8 is not null 
and home_player_9 is not null  
and home_player_10 is not null  
and home_player_11 is not null  
and away_player_1 is not null  
and away_player_2 is not null  
and away_player_3 is not null  
and away_player_4 is not null  
and away_player_5 is not null  
and away_player_6 is not null  
and away_player_7 is not null  
and away_player_8 is not null  
and away_player_9 is not null  
and away_player_10 is not null  
and away_player_11 is not null  
and  B365H is not null and B365D is not null and B365A is not null;"""
matches = pd.read_sql(query, conn)
matches









    Out[2]:






  
    
      
      id
      country_id
      league_id
      season
      stage
      date
      match_api_id
      home_team_api_id
      away_team_api_id
      home_team_goal
      ...
      SJA
      VCH
      VCD
      VCA
      GBH
      GBD
      GBA
      BSH
      BSD
      BSA
    
  
  
    
      0
      146
      1
      1
      2008/2009
      24
      2009-02-27 00:00:00
      493017
      8203
      9987
      2
      ...
      2.30
      2.65
      3.25
      2.35
      2.90
      3.25
      2.30
      2.80
      3.20
      2.25
    
    
      1
      154
      1
      1
      2008/2009
      25
      2009-03-08 00:00:00
      493025
      9984
      8342
      1
      ...
      2.25
      2.65
      3.20
      2.35
      2.90
      3.20
      2.30
      2.62
      3.20
      2.38
    
    
      2
      156
      1
      1
      2008/2009
      25
      2009-03-07 00:00:00
      493027
      8635
      10000
      2
      ...
      8.50
      1.30
      4.35
      8.00
      1.35
      4.33
      8.50
      1.36
      4.20
      7.00
    
    
      3
      163
      1
      1
      2008/2009
      26
      2009-03-13 00:00:00
      493034
      8203
      8635
      2
      ...
      1.73
      4.35
      3.30
      1.75
      4.50
      3.40
      1.75
      4.20
      3.30
      1.75
    
    
      4
      169
      1
      1
      2008/2009
      26
      2009-03-14 00:00:00
      493040
      10000
      9999
      0
      ...
      5.00
      1.65
      3.50
      4.50
      1.65
      3.50
      5.00
      1.70
      3.40
      4.33
    
    
      5
      174
      1
      1
      2008/2009
      27
      2009-03-22 00:00:00
      493045
      9991
      10000
      1
      ...
      4.75
      1.60
      3.40
      5.00
      1.65
      3.40
      5.00
      1.62
      3.50
      5.00
    
    
      6
      177
      1
      1
      2008/2009
      27
      2009-03-21 00:00:00
      493048
      9999
      8203
      1
      ...
      2.75
      2.25
      3.25
      2.80
      2.10
      3.25
      3.15
      2.25
      3.20
      2.75
    
    
      7
      190
      1
      1
      2008/2009
      29
      2009-04-12 00:00:00
      493061
      8635
      8342
      1
      ...
      4.75
      1.65
      3.40
      5.00
      1.70
      3.40
      4.50
      1.73
      3.40
      4.20
    
    
      8
      191
      1
      1
      2008/2009
      29
      2009-04-10 00:00:00
      493062
      9999
      9987
      1
      ...
      2.15
      2.80
      3.25
      2.25
      3.20
      3.20
      2.10
      2.80
      3.20
      2.25
    
    
      9
      220
      1
      1
      2008/2009
      31
      2009-04-26 00:00:00
      493082
      9999
      9991
      1
      ...
      2.20
      3.20
      3.20
      2.10
      3.00
      3.25
      2.15
      2.88
      3.25
      2.20
    
    
      10
      227
      1
      1
      2008/2009
      32
      2009-05-02 00:00:00
      493089
      10000
      9985
      0
      ...
      1.62
      5.00
      3.50
      1.60
      5.25
      3.50
      1.60
      5.00
      3.60
      1.57
    
    
      11
      230
      1
      1
      2008/2009
      32
      2009-05-02 00:00:00
      493092
      9991
      9984
      2
      ...
      8.50
      1.35
      4.00
      8.50
      1.35
      4.50
      7.00
      1.33
      4.50
      7.50
    
    
      12
      232
      1
      1
      2008/2009
      32
      2009-05-02 00:00:00
      493094
      10001
      9999
      1
      ...
      5.50
      1.55
      3.50
      5.50
      1.57
      3.60
      5.25
      1.57
      3.60
      5.00
    
    
      13
      235
      1
      1
      2008/2009
      33
      2009-05-09 00:00:00
      493097
      9985
      8342
      2
      ...
      7.50
      1.45
      3.80
      6.50
      1.47
      3.75
      6.50
      1.50
      3.75
      5.50
    
    
      14
      241
      1
      1
      2008/2009
      33
      2009-05-09 00:00:00
      493103
      8635
      9999
      3
      ...
      16.00
      1.14
      6.50
      13.00
      1.15
      6.50
      13.00
      1.17
      6.00
      13.00
    
    
      15
      243
      1
      1
      2008/2009
      33
      2009-05-09 00:00:00
      493105
      9984
      10001
      2
      ...
      2.63
      2.60
      3.25
      2.40
      2.50
      3.30
      2.50
      2.50
      3.20
      2.50
    
    
      16
      244
      1
      1
      2008/2009
      34
      2009-05-16 00:00:00
      493106
      9987
      8635
      0
      ...
      1.45
      6.50
      4.00
      1.44
      6.50
      3.75
      1.45
      7.00
      4.00
      1.40
    
    
      17
      245
      1
      1
      2008/2009
      34
      2009-05-16 00:00:00
      493107
      9991
      9985
      0
      ...
      1.70
      4.50
      3.60
      1.65
      4.50
      3.60
      1.65
      4.50
      3.50
      1.67
    
    
      18
      309
      1
      1
      2009/2010
      1
      2009-08-02 00:00:00
      665321
      9984
      9991
      1
      ...
      2.30
      2.62
      3.25
      2.40
      2.70
      3.20
      2.40
      2.88
      3.10
      2.30
    
    
      19
      310
      1
      1
      2009/2010
      1
      2009-08-01 00:00:00
      665322
      9994
      10000
      1
      ...
      3.30
      2.00
      3.25
      3.40
      2.10
      3.30
      3.10
      2.25
      3.20
      2.80
    
    
      20
      311
      1
      1
      2009/2010
      1
      2009-08-01 00:00:00
      665323
      8571
      8635
      0
      ...
      1.57
      5.50
      3.60
      1.57
      5.25
      3.50
      1.60
      5.50
      3.60
      1.53
    
    
      21
      314
      1
      1
      2009/2010
      10
      2009-10-04 00:00:00
      665411
      8342
      8635
      4
      ...
      2.75
      2.40
      3.25
      2.62
      2.45
      3.20
      2.70
      2.38
      3.20
      2.62
    
    
      22
      318
      1
      1
      2009/2010
      10
      2009-10-03 00:00:00
      665417
      8203
      9994
      2
      ...
      4.00
      1.85
      3.30
      3.75
      1.95
      3.25
      3.75
      1.91
      3.25
      3.60
    
    
      23
      320
      1
      1
      2009/2010
      10
      2009-10-03 00:00:00
      665421
      9993
      10001
      3
      ...
      4.50
      1.73
      3.40
      4.33
      1.75
      3.40
      4.50
      1.75
      3.30
      4.20
    
    
      24
      321
      1
      1
      2009/2010
      11
      2009-10-18 00:00:00
      665425
      8342
      9991
      1
      ...
      4.75
      1.70
      3.50
      4.00
      1.60
      3.50
      5.25
      1.67
      3.40
      4.75
    
    
      25
      323
      1
      1
      2009/2010
      11
      2009-10-18 00:00:00
      665427
      9987
      8203
      1
      ...
      4.33
      1.75
      3.40
      4.33
      1.83
      3.30
      4.00
      1.83
      3.30
      3.80
    
    
      26
      324
      1
      1
      2009/2010
      11
      2009-10-17 00:00:00
      665429
      10000
      9993
      4
      ...
      3.60
      1.90
      3.30
      3.60
      2.05
      3.25
      3.30
      2.00
      3.25
      3.30
    
    
      27
      325
      1
      1
      2009/2010
      11
      2009-10-17 00:00:00
      665430
      9994
      9984
      1
      ...
      3.40
      2.00
      3.25
      3.40
      2.10
      3.20
      3.20
      2.05
      3.25
      3.20
    
    
      28
      327
      1
      1
      2009/2010
      11
      2009-10-17 00:00:00
      665435
      10001
      9985
      2
      ...
      1.67
      5.50
      3.40
      1.60
      5.00
      3.40
      1.65
      4.75
      3.40
      1.67
    
    
      29
      328
      1
      1
      2009/2010
      12
      2009-10-24 00:00:00
      665438
      9985
      10000
      1
      ...
      6.50
      1.44
      3.75
      6.50
      1.40
      4.00
      7.50
      1.44
      3.75
      6.50
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      19570
      24492
      21484
      21484
      2015/2016
      6
      2015-09-27 00:00:00
      2030140
      9783
      8558
      3
      ...
      NaN
      2.10
      3.40
      3.90
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19571
      24493
      21484
      21484
      2015/2016
      6
      2015-09-27 00:00:00
      2030141
      9869
      8603
      1
      ...
      NaN
      2.30
      3.30
      3.40
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19572
      24494
      21484
      21484
      2015/2016
      7
      2015-10-03 00:00:00
      2030142
      8302
      8634
      2
      ...
      NaN
      4.10
      3.90
      1.91
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19573
      24496
      21484
      21484
      2015/2016
      7
      2015-10-02 00:00:00
      2030144
      9910
      8305
      0
      ...
      NaN
      1.50
      4.50
      7.50
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19574
      24497
      21484
      21484
      2015/2016
      7
      2015-10-04 00:00:00
      2030145
      8581
      10205
      1
      ...
      NaN
      4.40
      3.60
      1.93
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19575
      24498
      21484
      21484
      2015/2016
      7
      2015-10-04 00:00:00
      2030146
      9906
      8633
      1
      ...
      NaN
      3.13
      3.40
      2.45
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19576
      24499
      21484
      21484
      2015/2016
      7
      2015-10-03 00:00:00
      2030147
      9864
      8560
      3
      ...
      NaN
      2.40
      3.25
      3.30
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19577
      24500
      21484
      21484
      2015/2016
      7
      2015-10-04 00:00:00
      2030148
      8315
      10267
      3
      ...
      NaN
      2.38
      3.40
      3.25
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19578
      24501
      21484
      21484
      2015/2016
      7
      2015-10-03 00:00:00
      2030149
      7878
      9783
      1
      ...
      NaN
      2.63
      3.25
      3.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19579
      24502
      21484
      21484
      2015/2016
      7
      2015-10-03 00:00:00
      2030150
      8558
      9869
      1
      ...
      NaN
      2.00
      3.40
      4.30
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19580
      24503
      21484
      21484
      2015/2016
      7
      2015-10-04 00:00:00
      2030151
      8370
      8603
      0
      ...
      NaN
      2.20
      3.80
      3.30
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19581
      24504
      21484
      21484
      2015/2016
      8
      2015-10-17 00:00:00
      2030152
      8634
      8370
      5
      ...
      NaN
      1.15
      9.50
      19.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19582
      24505
      21484
      21484
      2015/2016
      8
      2015-10-17 00:00:00
      2030153
      8372
      8302
      1
      ...
      NaN
      3.60
      3.50
      2.20
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19583
      24507
      21484
      21484
      2015/2016
      8
      2015-10-18 00:00:00
      2030155
      10205
      9910
      1
      ...
      NaN
      2.05
      3.70
      3.75
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19584
      24508
      21484
      21484
      2015/2016
      8
      2015-10-17 00:00:00
      2030156
      8633
      8581
      3
      ...
      NaN
      1.08
      13.00
      34.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19585
      24509
      21484
      21484
      2015/2016
      8
      2015-10-18 00:00:00
      2030157
      8560
      9906
      0
      ...
      NaN
      4.30
      3.30
      2.05
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19586
      24510
      21484
      21484
      2015/2016
      8
      2015-10-17 00:00:00
      2030158
      10267
      9864
      3
      ...
      NaN
      1.62
      4.10
      6.25
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19587
      24511
      21484
      21484
      2015/2016
      8
      2015-10-18 00:00:00
      2030159
      9783
      8315
      2
      ...
      NaN
      2.70
      3.25
      2.88
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19588
      24512
      21484
      21484
      2015/2016
      8
      2015-10-19 00:00:00
      2030160
      9869
      7878
      3
      ...
      NaN
      2.00
      3.40
      4.33
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19589
      24513
      21484
      21484
      2015/2016
      8
      2015-10-17 00:00:00
      2030161
      8603
      8558
      1
      ...
      NaN
      2.05
      3.50
      4.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19590
      24514
      21484
      21484
      2015/2016
      9
      2015-10-25 00:00:00
      2030162
      8634
      8372
      3
      ...
      NaN
      1.11
      11.00
      31.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19591
      24515
      21484
      21484
      2015/2016
      9
      2015-10-24 00:00:00
      2030163
      8302
      8305
      5
      ...
      NaN
      1.50
      4.60
      7.50
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19592
      24516
      21484
      21484
      2015/2016
      9
      2015-10-25 00:00:00
      2030164
      8306
      10205
      0
      ...
      NaN
      3.60
      3.40
      2.20
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19593
      24517
      21484
      21484
      2015/2016
      9
      2015-10-24 00:00:00
      2030165
      9910
      8633
      1
      ...
      NaN
      3.90
      4.00
      1.93
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19594
      24518
      21484
      21484
      2015/2016
      9
      2015-10-25 00:00:00
      2030166
      8581
      8560
      0
      ...
      NaN
      2.75
      3.13
      2.90
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19595
      24519
      21484
      21484
      2015/2016
      9
      2015-10-25 00:00:00
      2030167
      9906
      10267
      2
      ...
      NaN
      1.57
      4.00
      7.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19596
      24520
      21484
      21484
      2015/2016
      9
      2015-10-24 00:00:00
      2030168
      9864
      9783
      2
      ...
      NaN
      2.30
      3.40
      3.40
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19597
      24521
      21484
      21484
      2015/2016
      9
      2015-10-26 00:00:00
      2030169
      8315
      9869
      3
      ...
      NaN
      1.55
      4.20
      7.00
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19598
      24522
      21484
      21484
      2015/2016
      9
      2015-10-24 00:00:00
      2030170
      7878
      8603
      1
      ...
      NaN
      2.30
      3.40
      3.30
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      19599
      24523
      21484
      21484
      2015/2016
      9
      2015-10-23 00:00:00
      2030171
      8370
      8558
      3
      ...
      NaN
      2.20
      3.60
      3.50
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

19600 rows × 115 columns



In [3]:

    
query = """ select * from Player as p ,Player_Stats as s where p.player_api_id = s.player_api_id;"""
player = pd.read_sql(query, conn)
player[['player_api_id','date_stat']]









    Out[3]:






  
    
      
      player_api_id
      player_api_id
      date_stat
    
  
  
    
      0
      505942
      505942
      2016-02-18 00:00:00
    
    
      1
      505942
      505942
      2015-11-19 00:00:00
    
    
      2
      505942
      505942
      2015-09-21 00:00:00
    
    
      3
      505942
      505942
      2015-03-20 00:00:00
    
    
      4
      505942
      505942
      2007-02-22 00:00:00
    
    
      5
      155782
      155782
      2016-04-21 00:00:00
    
    
      6
      155782
      155782
      2016-04-07 00:00:00
    
    
      7
      155782
      155782
      2016-01-07 00:00:00
    
    
      8
      155782
      155782
      2015-12-24 00:00:00
    
    
      9
      155782
      155782
      2015-12-17 00:00:00
    
    
      10
      155782
      155782
      2015-10-16 00:00:00
    
    
      11
      155782
      155782
      2015-09-25 00:00:00
    
    
      12
      155782
      155782
      2015-09-21 00:00:00
    
    
      13
      155782
      155782
      2015-01-09 00:00:00
    
    
      14
      155782
      155782
      2014-12-05 00:00:00
    
    
      15
      155782
      155782
      2014-11-07 00:00:00
    
    
      16
      155782
      155782
      2014-09-18 00:00:00
    
    
      17
      155782
      155782
      2014-05-02 00:00:00
    
    
      18
      155782
      155782
      2014-04-04 00:00:00
    
    
      19
      155782
      155782
      2014-03-14 00:00:00
    
    
      20
      155782
      155782
      2013-12-13 00:00:00
    
    
      21
      155782
      155782
      2013-11-08 00:00:00
    
    
      22
      155782
      155782
      2013-10-04 00:00:00
    
    
      23
      155782
      155782
      2013-09-20 00:00:00
    
    
      24
      155782
      155782
      2013-05-03 00:00:00
    
    
      25
      155782
      155782
      2013-03-22 00:00:00
    
    
      26
      155782
      155782
      2013-03-15 00:00:00
    
    
      27
      155782
      155782
      2013-02-22 00:00:00
    
    
      28
      155782
      155782
      2013-02-15 00:00:00
    
    
      29
      155782
      155782
      2012-08-31 00:00:00
    
    
      ...
      ...
      ...
      ...
    
    
      183223
      108760
      108760
      2014-09-18 00:00:00
    
    
      183224
      108760
      108760
      2013-12-06 00:00:00
    
    
      183225
      108760
      108760
      2013-11-22 00:00:00
    
    
      183226
      108760
      108760
      2013-09-20 00:00:00
    
    
      183227
      108760
      108760
      2009-08-30 00:00:00
    
    
      183228
      108760
      108760
      2007-02-22 00:00:00
    
    
      183229
      39494
      39494
      2016-04-14 00:00:00
    
    
      183230
      39494
      39494
      2016-02-11 00:00:00
    
    
      183231
      39494
      39494
      2015-10-09 00:00:00
    
    
      183232
      39494
      39494
      2015-09-21 00:00:00
    
    
      183233
      39494
      39494
      2015-01-09 00:00:00
    
    
      183234
      39494
      39494
      2014-10-31 00:00:00
    
    
      183235
      39494
      39494
      2014-09-18 00:00:00
    
    
      183236
      39494
      39494
      2014-03-28 00:00:00
    
    
      183237
      39494
      39494
      2014-03-14 00:00:00
    
    
      183238
      39494
      39494
      2014-02-28 00:00:00
    
    
      183239
      39494
      39494
      2013-09-20 00:00:00
    
    
      183240
      39494
      39494
      2013-04-26 00:00:00
    
    
      183241
      39494
      39494
      2013-02-15 00:00:00
    
    
      183242
      39494
      39494
      2012-08-31 00:00:00
    
    
      183243
      39494
      39494
      2012-02-22 00:00:00
    
    
      183244
      39494
      39494
      2011-08-30 00:00:00
    
    
      183245
      39494
      39494
      2011-02-22 00:00:00
    
    
      183246
      39494
      39494
      2010-08-30 00:00:00
    
    
      183247
      39494
      39494
      2010-02-22 00:00:00
    
    
      183248
      39494
      39494
      2009-08-30 00:00:00
    
    
      183249
      39494
      39494
      2009-02-22 00:00:00
    
    
      183250
      39494
      39494
      2008-08-30 00:00:00
    
    
      183251
      39494
      39494
      2007-08-30 00:00:00
    
    
      183252
      39494
      39494
      2007-02-22 00:00:00
    
  

183253 rows × 3 columns



In [4]:

    
drop = matches.columns.values[-27:-1]
print drop 
#Removing other betting houses odds
matches = matches.drop(drop,1)
matches= matches.drop('BSA',1)
#Raw features
matches.columns.values









    



['BWH' 'BWD' 'BWA' 'IWH' 'IWD' 'IWA' 'LBH' 'LBD' 'LBA' 'PSH' 'PSD' 'PSA'
 'WHH' 'WHD' 'WHA' 'SJH' 'SJD' 'SJA' 'VCH' 'VCD' 'VCA' 'GBH' 'GBD' 'GBA'
 'BSH' 'BSD']






    Out[4]:





array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession', 'B365H', 'B365D', 'B365A'], dtype=object)



In [5]:

    
matches.columns.values









    Out[5]:





array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession', 'B365H', 'B365D', 'B365A'], dtype=object)



In [6]:

    
matches= matches.drop(['goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession'],1)



In [7]:

    
#Transforming date column into a date type
matches['date']=pd.to_datetime(matches['date'], format='%Y-%m-%d %H:%M:%S.%f')
#
matches['date']=matches['date']-matches['date'].unique()[0]
matches['date']=matches['date'].astype('timedelta64[D]')



In [8]:

    
#matches=matches.drop(['home_player_1', 'home_player_2','home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
#       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
#       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
#       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
#       'away_player_8', 'away_player_9', 'away_player_10',
#       'away_player_11'])


#Transforming season column into categorical value
from sklearn import preprocessing 
le = preprocessing.LabelEncoder()
matches['season'] = le.fit_transform(matches['season'].astype('str'))



In [9]:

    
matches['season'].describe()









    Out[9]:





count    19600.000000
mean         3.714541
std          2.215884
min          0.000000
25%          2.000000
50%          4.000000
75%          6.000000
max          7.000000
Name: season, dtype: float64



In [10]:

    
matches.columns.values









    Out[10]:





array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'B365H', 'B365D', 'B365A'], dtype=object)



In [ ]:



In [11]:

    
import graphlab as gl
gl.canvas.set_target('ipynb')



In [12]:

    
#train_data,test_data = matches_without_ids.random_split(.9, seed=0)
#matches['H']=(matches['home_team_goal']>matches['away_team_goal']).astype(int)
#matches['A']=(matches['home_team_goal']<matches['away_team_goal']).astype(int)

def determine_home_result(match):
    if match['home_team_goal'] > match['away_team_goal']:
        return 'H'
    elif match['home_team_goal'] < match['away_team_goal']:
        return 'A'
    else:
        return 'D'
    
    
matches['Output']=matches.apply(determine_home_result, axis=1)



In [13]:

    
matches_without_ids = matches.drop(['home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11','id','home_team_goal','away_team_goal'],1)



In [14]:

    
matches_without_ids.columns.values









    Out[14]:





array(['country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_player_X1', 'home_player_X2', 'home_player_X3',
       'home_player_X4', 'home_player_X5', 'home_player_X6',
       'home_player_X7', 'home_player_X8', 'home_player_X9',
       'home_player_X10', 'home_player_X11', 'away_player_X1',
       'away_player_X2', 'away_player_X3', 'away_player_X4',
       'away_player_X5', 'away_player_X6', 'away_player_X7',
       'away_player_X8', 'away_player_X9', 'away_player_X10',
       'away_player_X11', 'home_player_Y1', 'home_player_Y2',
       'home_player_Y3', 'home_player_Y4', 'home_player_Y5',
       'home_player_Y6', 'home_player_Y7', 'home_player_Y8',
       'home_player_Y9', 'home_player_Y10', 'home_player_Y11',
       'away_player_Y1', 'away_player_Y2', 'away_player_Y3',
       'away_player_Y4', 'away_player_Y5', 'away_player_Y6',
       'away_player_Y7', 'away_player_Y8', 'away_player_Y9',
       'away_player_Y10', 'away_player_Y11', 'B365H', 'B365D', 'B365A',
       'Output'], dtype=object)



In [15]:

    
pure_data = gl.SFrame(matches_without_ids)
train_data,test_data = pure_data.random_split(.8, seed=0)
#train,valid=train_data.random_split(.8,seed=0)
folds = gl.cross_validation.KFold(train_data, 5)









    



This non-commercial license of GraphLab Create for academic use is assigned to felipeapfernandes@gmail.com and will expire on August 26, 2017.






    



[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1476546209.log



In [16]:

    
model_kfolds=[]
for train,valid in folds:
    #(train,valid) = folds[i]
    model = gl.logistic_classifier.create(train,
                                                   target='Output',
                                                 features=['country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_player_X1', 'home_player_X2', 'home_player_X3',
       'home_player_X4', 'home_player_X5', 'home_player_X6',
       'home_player_X7', 'home_player_X8', 'home_player_X9',
       'home_player_X10', 'home_player_X11', 'away_player_X1',
       'away_player_X2', 'away_player_X3', 'away_player_X4',
       'away_player_X5', 'away_player_X6', 'away_player_X7',
       'away_player_X8', 'away_player_X9', 'away_player_X10',
       'away_player_X11', 'home_player_Y1', 'home_player_Y2',
       'home_player_Y3', 'home_player_Y4', 'home_player_Y5',
       'home_player_Y6', 'home_player_Y7', 'home_player_Y8',
       'home_player_Y9', 'home_player_Y10', 'home_player_Y11',
       'away_player_Y1', 'away_player_Y2', 'away_player_Y3',
       'away_player_Y4', 'away_player_Y5', 'away_player_Y6',
       'away_player_Y7', 'away_player_Y8', 'away_player_Y9',
       'away_player_Y10', 'away_player_Y11','B365H', 'B365D', 'B365A'],
                                                validation_set=valid)
    model_kfolds.append(model)









    




WARNING: Detected extremely low variance for feature(s) 'home_player_Y2', 'away_player_Y2', 'away_player_Y3' because all entries are nearly the same.
Proceeding with model training using all features. If the model does not provide results of adequate quality, exclude the above mentioned feature(s) from the input dataset.






    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12597






    




Number of classes           : 3






    




Number of feature columns   : 55






    




Number of unpacked features : 55






    




Number of coefficients    : 112






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 1.183762     | 0.529809          | 0.522540            |






    




| 2         | 3        | 1.291705     | 0.534175          | 0.526984            |






    




| 3         | 4        | 1.398602     | 0.533778          | 0.526984            |






    




| 4         | 5        | 1.526274     | 0.534016          | 0.527302            |






    




| 5         | 6        | 1.640237     | 0.534016          | 0.527302            |






    




+-----------+----------+--------------+-------------------+---------------------+






    




SUCCESS: Optimal solution found.






    











    




WARNING: Detected extremely low variance for feature(s) 'home_player_Y2', 'away_player_Y2', 'away_player_Y3', 'away_player_Y4' because all entries are nearly the same.
Proceeding with model training using all features. If the model does not provide results of adequate quality, exclude the above mentioned feature(s) from the input dataset.






    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12597






    




Number of classes           : 3






    




Number of feature columns   : 55






    




Number of unpacked features : 55






    




Number of coefficients    : 112






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 0.185095     | 0.535524          | 0.495873            |






    




| 2         | 3        | 0.315123     | 0.539811          | 0.500317            |






    




| 3         | 4        | 0.424920     | 0.540287          | 0.498413            |






    




| 4         | 5        | 0.534586     | 0.540208          | 0.499048            |






    




| 5         | 6        | 0.649053     | 0.540208          | 0.499048            |






    




+-----------+----------+--------------+-------------------+---------------------+






    




SUCCESS: Optimal solution found.






    











    




WARNING: Detected extremely low variance for feature(s) 'home_player_X1', 'home_player_Y1', 'home_player_Y2', 'home_player_Y3', 'away_player_Y2', 'away_player_Y3' because all entries are nearly the same.
Proceeding with model training using all features. If the model does not provide results of adequate quality, exclude the above mentioned feature(s) from the input dataset.






    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12598






    




Number of classes           : 3






    




Number of feature columns   : 55






    




Number of unpacked features : 55






    




Number of coefficients    : 112






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 0.171700     | 0.530243          | 0.530010            |






    




| 2         | 3        | 0.290582     | 0.534688          | 0.527151            |






    




| 3         | 4        | 0.402009     | 0.535164          | 0.528422            |






    




| 4         | 5        | 0.520443     | 0.535085          | 0.528739            |






    




| 5         | 6        | 0.632263     | 0.535085          | 0.528739            |






    




+-----------+----------+--------------+-------------------+---------------------+






    




SUCCESS: Optimal solution found.






    











    




WARNING: Detected extremely low variance for feature(s) 'home_player_Y2', 'away_player_Y2', 'away_player_Y3' because all entries are nearly the same.
Proceeding with model training using all features. If the model does not provide results of adequate quality, exclude the above mentioned feature(s) from the input dataset.






    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12598






    




Number of classes           : 3






    




Number of feature columns   : 55






    




Number of unpacked features : 55






    




Number of coefficients    : 112






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 0.172361     | 0.526512          | 0.542394            |






    




| 2         | 3        | 0.285408     | 0.529767          | 0.543347            |






    




| 3         | 4        | 0.401147     | 0.530005          | 0.543347            |






    




| 4         | 5        | 0.524673     | 0.529687          | 0.543347            |






    




| 5         | 6        | 0.635208     | 0.529687          | 0.543347            |






    




+-----------+----------+--------------+-------------------+---------------------+






    




SUCCESS: Optimal solution found.






    











    




WARNING: Detected extremely low variance for feature(s) 'home_player_Y2', 'away_player_Y2', 'away_player_Y3' because all entries are nearly the same.
Proceeding with model training using all features. If the model does not provide results of adequate quality, exclude the above mentioned feature(s) from the input dataset.






    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12598






    




Number of classes           : 3






    




Number of feature columns   : 55






    




Number of unpacked features : 55






    




Number of coefficients    : 112






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 0.178313     | 0.526433          | 0.544935            |






    




| 2         | 3        | 0.292984     | 0.528655          | 0.544300            |






    




| 3         | 4        | 0.405852     | 0.528576          | 0.543982            |






    




| 4         | 5        | 0.514648     | 0.528497          | 0.543982            |






    




+-----------+----------+--------------+-------------------+---------------------+






    




SUCCESS: Optimal solution found.



In [17]:

    
#result=pure_model.evaluate(test_data)
result=[]
for model in model_kfolds:
    result.append(model.evaluate(test_data))



In [18]:

    
#pure_model.show(view='Evaluation')
result









    Out[18]:





[{'accuracy': 0.5284194134440695,
  'auc': 0.6526528046893878,
  'confusion_matrix': Columns:
  	target_label	str
  	predicted_label	str
  	count	int
  
  Rows: 9
  
  Data:
  +--------------+-----------------+-------+
  | target_label | predicted_label | count |
  +--------------+-----------------+-------+
  |      H       |        D        |   8   |
  |      A       |        D        |   12  |
  |      D       |        D        |   8   |
  |      D       |        H        |  710  |
  |      H       |        H        |  1495 |
  |      H       |        A        |  249  |
  |      D       |        A        |  252  |
  |      A       |        A        |  533  |
  |      A       |        H        |  586  |
  +--------------+-----------------+-------+
  [9 rows x 3 columns],
  'f1_score': 0.3888554069777361,
  'log_loss': 0.9841838473238556,
  'precision': 0.4456128260263131,
  'recall': 0.44427409759320313,
  'roc_curve': Columns:
  	threshold	float
  	fpr	float
  	tpr	float
  	p	int
  	n	int
  	class	int
  
  Rows: 300003
  
  Data:
  +-----------+----------------+-----+------+------+-------+
  | threshold |      fpr       | tpr |  p   |  n   | class |
  +-----------+----------------+-----+------+------+-------+
  |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   1e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   2e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   3e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   4e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   5e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   6e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   7e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   8e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   9e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  +-----------+----------------+-----+------+------+-------+
  [300003 rows x 6 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.},
 {'accuracy': 0.5242668050869452,
  'auc': 0.6532557245341993,
  'confusion_matrix': Columns:
  	target_label	str
  	predicted_label	str
  	count	int
  
  Rows: 9
  
  Data:
  +--------------+-----------------+-------+
  | target_label | predicted_label | count |
  +--------------+-----------------+-------+
  |      A       |        D        |   8   |
  |      D       |        D        |   6   |
  |      H       |        D        |   11  |
  |      H       |        H        |  1505 |
  |      D       |        H        |  717  |
  |      D       |        A        |  247  |
  |      H       |        A        |  236  |
  |      A       |        A        |  509  |
  |      A       |        H        |  614  |
  +--------------+-----------------+-------+
  [9 rows x 3 columns],
  'f1_score': 0.3825432379258673,
  'log_loss': 0.983617354540887,
  'precision': 0.42792728286091264,
  'recall': 0.43841601350513004,
  'roc_curve': Columns:
  	threshold	float
  	fpr	float
  	tpr	float
  	p	int
  	n	int
  	class	int
  
  Rows: 300003
  
  Data:
  +-----------+----------------+-----+------+------+-------+
  | threshold |      fpr       | tpr |  p   |  n   | class |
  +-----------+----------------+-----+------+------+-------+
  |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   1e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   2e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   3e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   4e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   5e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   6e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   7e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   8e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   9e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  +-----------+----------------+-----+------+------+-------+
  [300003 rows x 6 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.},
 {'accuracy': 0.5258240332208669,
  'auc': 0.6565211030211825,
  'confusion_matrix': Columns:
  	target_label	str
  	predicted_label	str
  	count	int
  
  Rows: 9
  
  Data:
  +--------------+-----------------+-------+
  | target_label | predicted_label | count |
  +--------------+-----------------+-------+
  |      H       |        D        |   27  |
  |      A       |        D        |   22  |
  |      D       |        D        |   19  |
  |      D       |        H        |  697  |
  |      H       |        H        |  1489 |
  |      H       |        A        |  236  |
  |      D       |        A        |  254  |
  |      A       |        A        |  518  |
  |      A       |        H        |  591  |
  +--------------+-----------------+-------+
  [9 rows x 3 columns],
  'f1_score': 0.3928292116642802,
  'log_loss': 0.9824502417796135,
  'precision': 0.4431635956106925,
  'recall': 0.4424917473204779,
  'roc_curve': Columns:
  	threshold	float
  	fpr	float
  	tpr	float
  	p	int
  	n	int
  	class	int
  
  Rows: 300003
  
  Data:
  +-----------+----------------+-----+------+------+-------+
  | threshold |      fpr       | tpr |  p   |  n   | class |
  +-----------+----------------+-----+------+------+-------+
  |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   1e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   2e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   3e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   4e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   5e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   6e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   7e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   8e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   9e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  +-----------+----------------+-----+------+------+-------+
  [300003 rows x 6 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.},
 {'accuracy': 0.5263431092655074,
  'auc': 0.6549354976301381,
  'confusion_matrix': Columns:
  	target_label	str
  	predicted_label	str
  	count	int
  
  Rows: 9
  
  Data:
  +--------------+-----------------+-------+
  | target_label | predicted_label | count |
  +--------------+-----------------+-------+
  |      H       |        D        |   18  |
  |      A       |        D        |   18  |
  |      D       |        D        |   21  |
  |      D       |        H        |  724  |
  |      H       |        H        |  1511 |
  |      H       |        A        |  223  |
  |      D       |        A        |  225  |
  |      A       |        A        |  496  |
  |      A       |        H        |  617  |
  +--------------+-----------------+-------+
  [9 rows x 3 columns],
  'f1_score': 0.39178461790825175,
  'log_loss': 0.9824630639160944,
  'precision': 0.4745494760029844,
  'recall': 0.4408807875672823,
  'roc_curve': Columns:
  	threshold	float
  	fpr	float
  	tpr	float
  	p	int
  	n	int
  	class	int
  
  Rows: 300003
  
  Data:
  +-----------+----------------+-----+------+------+-------+
  | threshold |      fpr       | tpr |  p   |  n   | class |
  +-----------+----------------+-----+------+------+-------+
  |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   1e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   2e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   3e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   4e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   5e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   6e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   7e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   8e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   9e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  +-----------+----------------+-----+------+------+-------+
  [300003 rows x 6 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.},
 {'accuracy': 0.5258240332208669,
  'auc': 0.6528051553133635,
  'confusion_matrix': Columns:
  	target_label	str
  	predicted_label	str
  	count	int
  
  Rows: 9
  
  Data:
  +--------------+-----------------+-------+
  | target_label | predicted_label | count |
  +--------------+-----------------+-------+
  |      A       |        D        |   23  |
  |      D       |        D        |   24  |
  |      H       |        D        |   27  |
  |      D       |        H        |  704  |
  |      H       |        H        |  1484 |
  |      H       |        A        |  241  |
  |      D       |        A        |  242  |
  |      A       |        A        |  518  |
  |      A       |        H        |  590  |
  +--------------+-----------------+-------+
  [9 rows x 3 columns],
  'f1_score': 0.3956977849680828,
  'log_loss': 0.985605370988062,
  'precision': 0.4586680353419015,
  'recall': 0.44325866661938423,
  'roc_curve': Columns:
  	threshold	float
  	fpr	float
  	tpr	float
  	p	int
  	n	int
  	class	int
  
  Rows: 300003
  
  Data:
  +-----------+----------------+-----+------+------+-------+
  | threshold |      fpr       | tpr |  p   |  n   | class |
  +-----------+----------------+-----+------+------+-------+
  |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
  |   1e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   2e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   3e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   4e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   5e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   6e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   7e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   8e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  |   9e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
  +-----------+----------------+-----+------+------+-------+
  [300003 rows x 6 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}]



In [19]:

    
top = model.predict_topk(test_data, output_type='probability', k = 3)



In [20]:

    
print top









    



+----+-------+----------------+
| id | class |  probability   |
+----+-------+----------------+
| 0  |   H   | 0.576467886553 |
| 0  |   D   | 0.234468528222 |
| 0  |   A   | 0.189063585225 |
| 1  |   H   | 0.388531413143 |
| 1  |   A   | 0.356242476119 |
| 1  |   D   | 0.255226110738 |
| 2  |   H   | 0.483082240589 |
| 2  |   D   | 0.272892422591 |
| 2  |   A   | 0.24402533682  |
| 3  |   H   | 0.502684594609 |
+----+-------+----------------+
[11559 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.



In [21]:

    
pred = model.predict(test_data)



In [22]:

    
(test_data['Output']==pred).sum()/(len(pred)*1.0)









    Out[22]:





0.5258240332208669



In [ ]:



In [23]:

    
import re
def def_formations(matches_positions):
    pos=matches_positions.to_dataframe()
    form=[]
    for index,row in pos.iterrows():
        b= row.values
        dfs = (b <= 3).sum()
        mid1 = ((b >= 4) & (b<=6)).sum()
        mid2 = ((b >= 7) & (b<=9)).sum()
        atk1 = ((b >= 10)).sum()
        formation="%d-%d-%d-%d"%(dfs,mid1,mid2,atk1)
        formation = re.sub('0-','',formation)
        form.append(formation)
    return form



In [24]:

    
positions_home= pure_data[['home_player_Y2',
                     'home_player_Y3',
                     'home_player_Y4',
                     'home_player_Y5',
                     'home_player_Y6',
                     'home_player_Y7',
                     'home_player_Y8',
                     'home_player_Y9',
                     'home_player_Y10',
                     'home_player_Y11']]
positions_away = pure_data[['away_player_Y2',
                     'away_player_Y3',
                     'away_player_Y4',
                     'away_player_Y5',
                     'away_player_Y6',
                     'away_player_Y7',
                     'away_player_Y8',
                     'away_player_Y9',
                     'away_player_Y10',
                     'away_player_Y11']]
formation_home=def_formations(positions_home)
formation_away=def_formations(positions_away)



In [25]:

    
pure_data['formation_h']=formation_home
pure_data['formation_a']=formation_away

pure_data['formation_a'].unique()









    Out[25]:





dtype: str
Rows: 19
['3-2-3-2', '3-5-2', '4-2-2-2', '3-1-4-2', '3-4-3', '4-4-2', '4-1-2-3', '4-2-1-3', '3-3-3-1', '4-3-2-1', '4-1-4-1', '4-3-1-2', '4-5-1', '5-3-2', '4-1-3-2', '4-3-3', '4-2-3-1', '3-6-1', '5-4-1']



In [26]:

    
data= pure_data



In [27]:

    
data=pure_data.to_dataframe().drop(['home_player_X1', 'home_player_X2', 'home_player_X3',
       'home_player_X4', 'home_player_X5', 'home_player_X6',
       'home_player_X7', 'home_player_X8', 'home_player_X9',
       'home_player_X10', 'home_player_X11', 'away_player_X1',
       'away_player_X2', 'away_player_X3', 'away_player_X4',
       'away_player_X5', 'away_player_X6', 'away_player_X7',
       'away_player_X8', 'away_player_X9', 'away_player_X10',
       'away_player_X11', 'home_player_Y1', 'home_player_Y2',
       'home_player_Y3', 'home_player_Y4', 'home_player_Y5',
       'home_player_Y6', 'home_player_Y7', 'home_player_Y8',
       'home_player_Y9', 'home_player_Y10', 'home_player_Y11',
       'away_player_Y1', 'away_player_Y2', 'away_player_Y3',
       'away_player_Y4', 'away_player_Y5', 'away_player_Y6',
       'away_player_Y7', 'away_player_Y8', 'away_player_Y9',
       'away_player_Y10', 'away_player_Y11'],1)



In [28]:

    
data = gl.SFrame(data)
data.show()



In [29]:

    
train_data,test_data = data.random_split(.8, seed=0)
train,valid=train_data.random_split(.8,seed=0)
model = gl.logistic_classifier.create(train,target='Output',
                                      features=['country_id','league_id','season','stage','date','match_api_id',
                                                'home_team_api_id','away_team_api_id','B365H','B365D','B365A',
                                                'formation_h','formation_a'], validation_set=valid)









    




Logistic regression:






    




--------------------------------------------------------






    




Number of examples          : 12674






    




Number of classes           : 3






    




Number of feature columns   : 13






    




Number of unpacked features : 13






    




Number of coefficients    : 96






    




Starting Newton Method






    




--------------------------------------------------------






    




+-----------+----------+--------------+-------------------+---------------------+






    




| Iteration | Passes   | Elapsed Time | Training-accuracy | Validation-accuracy |






    




+-----------+----------+--------------+-------------------+---------------------+






    




| 1         | 2        | 0.089409     | 0.530614          | 0.526196            |






    




| 2         | 3        | 0.149522     | 0.534007          | 0.529450            |






    




| 3         | 4        | 0.209564     | 0.533139          | 0.530426            |






    




| 4         | 5        | 0.270951     | 0.533297          | 0.530101            |






    




| 5         | 6        | 0.332927     | 0.533297          | 0.530101            |






    




| 6         | 7        | 0.393710     | 0.533297          | 0.530101            |






    




+-----------+----------+--------------+-------------------+---------------------+



In [30]:

    
model.evaluate(test_data)









    




SUCCESS: Optimal solution found.






    











    Out[30]:





{'accuracy': 0.5315338697119127,
 'auc': 0.6526445952145282,
 'confusion_matrix': Columns:
 	target_label	str
 	predicted_label	str
 	count	int
 
 Rows: 9
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      H       |        D        |   13  |
 |      A       |        D        |   15  |
 |      D       |        D        |   18  |
 |      H       |        H        |  1511 |
 |      D       |        H        |  708  |
 |      D       |        A        |  244  |
 |      H       |        A        |  228  |
 |      A       |        A        |  519  |
 |      A       |        H        |  597  |
 +--------------+-----------------+-------+
 [9 rows x 3 columns],
 'f1_score': 0.39538430285888615,
 'log_loss': 0.9864334331563913,
 'precision': 0.4838648243862084,
 'recall': 0.4466285216833064,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 	class	int
 
 Rows: 300003
 
 Data:
 +-----------+----------------+-----+------+------+-------+
 | threshold |      fpr       | tpr |  p   |  n   | class |
 +-----------+----------------+-----+------+------+-------+
 |    0.0    |      1.0       | 1.0 | 1131 | 2722 |   0   |
 |   1e-05   |      1.0       | 1.0 | 1131 | 2722 |   0   |
 |   2e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
 |   3e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
 |   4e-05   | 0.999632623071 | 1.0 | 1131 | 2722 |   0   |
 |   5e-05   | 0.999265246143 | 1.0 | 1131 | 2722 |   0   |
 |   6e-05   | 0.999265246143 | 1.0 | 1131 | 2722 |   0   |
 |   7e-05   | 0.999265246143 | 1.0 | 1131 | 2722 |   0   |
 |   8e-05   | 0.999265246143 | 1.0 | 1131 | 2722 |   0   |
 |   9e-05   | 0.999265246143 | 1.0 | 1131 | 2722 |   0   |
 +-----------+----------------+-----+------+------+-------+
 [300003 rows x 6 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}



In [31]:

    
pred = model.predict(test_data)
(test_data['Output']==pred).sum()/(len(pred)*1.0)









    Out[31]:





0.5315338697119127

Feature Engineering



In [40]:

    
def ExtractGoalTendency(values):
    #data = values.to_dataframe()
    values.sort_values(by=['league_id','season','stage'])
    return values
    
    
    

data_goals = ExtractGoalTendency(data.to_dataframe())
data_goals









    Out[40]:






  
    
      
      country_id
      league_id
      season
      stage
      date
      match_api_id
      home_team_api_id
      away_team_api_id
      B365H
      B365D
      B365A
      Output
      formation_h
      formation_a
    
  
  
    
      0
      1
      1
      0
      24
      0.0
      493017
      8203
      9987
      3.00
      3.40
      2.30
      H
      4-4-2
      4-4-2
    
    
      1
      1
      1
      0
      25
      9.0
      493025
      9984
      8342
      2.80
      3.20
      2.37
      A
      4-4-2
      4-4-2
    
    
      2
      1
      1
      0
      25
      8.0
      493027
      8635
      10000
      1.40
      4.50
      8.00
      H
      4-4-2
      4-4-2
    
    
      3
      1
      1
      0
      26
      14.0
      493034
      8203
      8635
      4.50
      3.60
      1.75
      H
      4-4-2
      4-4-2
    
    
      4
      1
      1
      0
      26
      15.0
      493040
      10000
      9999
      1.75
      3.50
      4.75
      D
      4-4-2
      4-4-2
    
    
      5
      1
      1
      0
      27
      23.0
      493045
      9991
      10000
      1.65
      3.60
      4.75
      H
      4-4-2
      4-4-2
    
    
      6
      1
      1
      0
      27
      22.0
      493048
      9999
      8203
      2.20
      3.30
      3.00
      H
      4-4-2
      4-4-2
    
    
      7
      1
      1
      0
      29
      44.0
      493061
      8635
      8342
      1.67
      3.50
      5.00
      H
      4-4-2
      4-4-2
    
    
      8
      1
      1
      0
      29
      42.0
      493062
      9999
      9987
      3.20
      3.25
      2.25
      H
      4-4-2
      4-4-2
    
    
      9
      1
      1
      0
      31
      58.0
      493082
      9999
      9991
      3.20
      3.30
      2.10
      A
      4-4-2
      4-4-2
    
    
      10
      1
      1
      0
      32
      64.0
      493089
      10000
      9985
      5.50
      3.60
      1.57
      D
      4-4-2
      4-4-2
    
    
      11
      1
      1
      0
      32
      64.0
      493092
      9991
      9984
      1.36
      4.33
      7.50
      H
      4-4-2
      4-4-2
    
    
      12
      1
      1
      0
      32
      64.0
      493094
      10001
      9999
      1.57
      3.60
      5.50
      H
      4-4-2
      4-4-2
    
    
      13
      1
      1
      0
      33
      71.0
      493097
      9985
      8342
      1.44
      4.33
      7.00
      H
      4-4-2
      4-4-2
    
    
      14
      1
      1
      0
      33
      71.0
      493103
      8635
      9999
      1.14
      7.50
      19.00
      H
      4-4-2
      4-4-2
    
    
      15
      1
      1
      0
      33
      71.0
      493105
      9984
      10001
      2.62
      3.30
      2.62
      H
      4-4-2
      4-4-2
    
    
      16
      1
      1
      0
      34
      78.0
      493106
      9987
      8635
      8.00
      4.00
      1.40
      A
      4-4-2
      4-4-2
    
    
      17
      1
      1
      0
      34
      78.0
      493107
      9991
      9985
      4.33
      3.80
      1.67
      A
      4-4-2
      4-4-2
    
    
      18
      1
      1
      1
      1
      156.0
      665321
      9984
      9991
      2.88
      3.30
      2.25
      A
      4-4-2
      4-4-2
    
    
      19
      1
      1
      1
      1
      155.0
      665322
      9994
      10000
      2.30
      3.30
      2.88
      D
      4-4-2
      4-4-2
    
    
      20
      1
      1
      1
      1
      155.0
      665323
      8571
      8635
      5.75
      3.60
      1.57
      A
      4-4-2
      4-4-2
    
    
      21
      1
      1
      1
      10
      219.0
      665411
      8342
      8635
      2.50
      3.25
      2.60
      H
      4-4-2
      4-4-2
    
    
      22
      1
      1
      1
      10
      218.0
      665417
      8203
      9994
      1.91
      3.30
      4.20
      H
      4-4-2
      4-4-2
    
    
      23
      1
      1
      1
      10
      218.0
      665421
      9993
      10001
      1.73
      3.50
      5.00
      H
      4-4-2
      4-4-2
    
    
      24
      1
      1
      1
      11
      233.0
      665425
      8342
      9991
      1.70
      3.40
      4.75
      H
      4-4-2
      4-4-2
    
    
      25
      1
      1
      1
      11
      233.0
      665427
      9987
      8203
      1.85
      3.30
      4.00
      A
      4-4-2
      4-4-2
    
    
      26
      1
      1
      1
      11
      232.0
      665429
      10000
      9993
      1.91
      3.30
      3.80
      H
      4-4-2
      4-4-2
    
    
      27
      1
      1
      1
      11
      232.0
      665430
      9994
      9984
      2.10
      3.20
      3.30
      D
      4-4-2
      4-4-2
    
    
      28
      1
      1
      1
      11
      232.0
      665435
      10001
      9985
      5.50
      3.50
      1.60
      H
      4-4-2
      4-4-2
    
    
      29
      1
      1
      1
      12
      239.0
      665438
      9985
      10000
      1.50
      4.00
      7.00
      D
      4-4-2
      4-4-2
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      19570
      21484
      21484
      7
      6
      2403.0
      2030140
      9783
      8558
      2.10
      3.30
      3.75
      H
      4-4-2
      4-4-2
    
    
      19571
      21484
      21484
      7
      6
      2403.0
      2030141
      9869
      8603
      2.30
      3.20
      3.30
      A
      4-4-2
      4-2-3-1
    
    
      19572
      21484
      21484
      7
      7
      2409.0
      2030142
      8302
      8634
      4.00
      3.60
      1.91
      H
      4-2-3-1
      4-3-3
    
    
      19573
      21484
      21484
      7
      7
      2408.0
      2030144
      9910
      8305
      1.44
      4.50
      7.50
      D
      4-3-3
      4-2-3-1
    
    
      19574
      21484
      21484
      7
      7
      2410.0
      2030145
      8581
      10205
      4.33
      3.40
      1.91
      H
      5-3-2
      4-4-2
    
    
      19575
      21484
      21484
      7
      7
      2410.0
      2030146
      9906
      8633
      3.20
      3.30
      2.30
      D
      4-4-2
      4-3-3
    
    
      19576
      21484
      21484
      7
      7
      2409.0
      2030147
      9864
      8560
      2.38
      3.10
      3.30
      H
      4-2-3-1
      4-2-3-1
    
    
      19577
      21484
      21484
      7
      7
      2410.0
      2030148
      8315
      10267
      2.30
      3.30
      3.20
      H
      4-2-3-1
      4-2-3-1
    
    
      19578
      21484
      21484
      7
      7
      2409.0
      2030149
      7878
      9783
      2.60
      3.20
      2.80
      D
      4-2-3-1
      4-4-2
    
    
      19579
      21484
      21484
      7
      7
      2409.0
      2030150
      8558
      9869
      2.00
      3.30
      4.00
      A
      4-2-3-1
      4-2-3-1
    
    
      19580
      21484
      21484
      7
      7
      2410.0
      2030151
      8370
      8603
      2.20
      3.50
      3.20
      A
      4-2-3-1
      4-4-2
    
    
      19581
      21484
      21484
      7
      8
      2423.0
      2030152
      8634
      8370
      1.14
      9.00
      15.00
      H
      4-3-3
      4-2-3-1
    
    
      19582
      21484
      21484
      7
      8
      2423.0
      2030153
      8372
      8302
      3.60
      3.40
      2.10
      D
      4-2-3-1
      4-2-3-1
    
    
      19583
      21484
      21484
      7
      8
      2424.0
      2030155
      10205
      9910
      2.00
      3.50
      3.75
      A
      4-4-2
      4-3-3
    
    
      19584
      21484
      21484
      7
      8
      2423.0
      2030156
      8633
      8581
      1.08
      12.00
      23.00
      H
      4-3-3
      5-3-2
    
    
      19585
      21484
      21484
      7
      8
      2424.0
      2030157
      8560
      9906
      4.00
      3.20
      2.05
      A
      4-2-3-1
      4-4-2
    
    
      19586
      21484
      21484
      7
      8
      2423.0
      2030158
      10267
      9864
      1.62
      3.75
      6.00
      H
      4-2-3-1
      4-2-3-1
    
    
      19587
      21484
      21484
      7
      8
      2424.0
      2030159
      9783
      8315
      2.70
      3.10
      2.80
      D
      4-4-2
      4-2-3-1
    
    
      19588
      21484
      21484
      7
      8
      2425.0
      2030160
      9869
      7878
      2.00
      3.20
      4.20
      D
      4-2-3-1
      4-1-4-1
    
    
      19589
      21484
      21484
      7
      8
      2423.0
      2030161
      8603
      8558
      2.00
      3.30
      4.00
      A
      4-2-3-1
      4-2-3-1
    
    
      19590
      21484
      21484
      7
      9
      2431.0
      2030162
      8634
      8372
      1.11
      10.00
      19.00
      H
      4-3-3
      4-2-3-1
    
    
      19591
      21484
      21484
      7
      9
      2430.0
      2030163
      8302
      8305
      1.44
      4.33
      8.00
      H
      4-2-3-1
      4-2-3-1
    
    
      19592
      21484
      21484
      7
      9
      2431.0
      2030164
      8306
      10205
      3.50
      3.25
      2.20
      D
      4-2-3-1
      4-4-2
    
    
      19593
      21484
      21484
      7
      9
      2430.0
      2030165
      9910
      8633
      3.80
      3.80
      1.91
      A
      4-3-3
      4-3-3
    
    
      19594
      21484
      21484
      7
      9
      2431.0
      2030166
      8581
      8560
      2.63
      3.20
      2.80
      A
      5-3-2
      4-2-3-1
    
    
      19595
      21484
      21484
      7
      9
      2431.0
      2030167
      9906
      10267
      1.57
      3.80
      6.50
      H
      4-3-3
      4-4-2
    
    
      19596
      21484
      21484
      7
      9
      2430.0
      2030168
      9864
      9783
      2.25
      3.25
      3.40
      H
      4-2-3-1
      4-4-2
    
    
      19597
      21484
      21484
      7
      9
      2432.0
      2030169
      8315
      9869
      1.53
      4.00
      7.00
      H
      4-2-3-1
      4-3-3
    
    
      19598
      21484
      21484
      7
      9
      2430.0
      2030170
      7878
      8603
      2.30
      3.25
      3.25
      D
      4-3-3
      4-4-2
    
    
      19599
      21484
      21484
      7
      9
      2429.0
      2030171
      8370
      8558
      2.20
      3.40
      3.20
      H
      4-2-3-1
      4-2-3-1
    
  

19600 rows × 14 columns

	id	country_id	league_id	season	stage	date	match_api_id	home_team_api_id	away_team_api_id	home_team_goal	...	SJA	VCH	VCD	VCA	GBH	GBD	GBA	BSH	BSD	BSA
0	146	1	1	2008/2009	24	2009-02-27 00:00:00	493017	8203	9987	2	...	2.30	2.65	3.25	2.35	2.90	3.25	2.30	2.80	3.20	2.25
1	154	1	1	2008/2009	25	2009-03-08 00:00:00	493025	9984	8342	1	...	2.25	2.65	3.20	2.35	2.90	3.20	2.30	2.62	3.20	2.38
2	156	1	1	2008/2009	25	2009-03-07 00:00:00	493027	8635	10000	2	...	8.50	1.30	4.35	8.00	1.35	4.33	8.50	1.36	4.20	7.00
3	163	1	1	2008/2009	26	2009-03-13 00:00:00	493034	8203	8635	2	...	1.73	4.35	3.30	1.75	4.50	3.40	1.75	4.20	3.30	1.75
4	169	1	1	2008/2009	26	2009-03-14 00:00:00	493040	10000	9999	0	...	5.00	1.65	3.50	4.50	1.65	3.50	5.00	1.70	3.40	4.33
5	174	1	1	2008/2009	27	2009-03-22 00:00:00	493045	9991	10000	1	...	4.75	1.60	3.40	5.00	1.65	3.40	5.00	1.62	3.50	5.00
6	177	1	1	2008/2009	27	2009-03-21 00:00:00	493048	9999	8203	1	...	2.75	2.25	3.25	2.80	2.10	3.25	3.15	2.25	3.20	2.75
7	190	1	1	2008/2009	29	2009-04-12 00:00:00	493061	8635	8342	1	...	4.75	1.65	3.40	5.00	1.70	3.40	4.50	1.73	3.40	4.20
8	191	1	1	2008/2009	29	2009-04-10 00:00:00	493062	9999	9987	1	...	2.15	2.80	3.25	2.25	3.20	3.20	2.10	2.80	3.20	2.25
9	220	1	1	2008/2009	31	2009-04-26 00:00:00	493082	9999	9991	1	...	2.20	3.20	3.20	2.10	3.00	3.25	2.15	2.88	3.25	2.20
10	227	1	1	2008/2009	32	2009-05-02 00:00:00	493089	10000	9985	0	...	1.62	5.00	3.50	1.60	5.25	3.50	1.60	5.00	3.60	1.57
11	230	1	1	2008/2009	32	2009-05-02 00:00:00	493092	9991	9984	2	...	8.50	1.35	4.00	8.50	1.35	4.50	7.00	1.33	4.50	7.50
12	232	1	1	2008/2009	32	2009-05-02 00:00:00	493094	10001	9999	1	...	5.50	1.55	3.50	5.50	1.57	3.60	5.25	1.57	3.60	5.00
13	235	1	1	2008/2009	33	2009-05-09 00:00:00	493097	9985	8342	2	...	7.50	1.45	3.80	6.50	1.47	3.75	6.50	1.50	3.75	5.50
14	241	1	1	2008/2009	33	2009-05-09 00:00:00	493103	8635	9999	3	...	16.00	1.14	6.50	13.00	1.15	6.50	13.00	1.17	6.00	13.00
15	243	1	1	2008/2009	33	2009-05-09 00:00:00	493105	9984	10001	2	...	2.63	2.60	3.25	2.40	2.50	3.30	2.50	2.50	3.20	2.50
16	244	1	1	2008/2009	34	2009-05-16 00:00:00	493106	9987	8635	0	...	1.45	6.50	4.00	1.44	6.50	3.75	1.45	7.00	4.00	1.40
17	245	1	1	2008/2009	34	2009-05-16 00:00:00	493107	9991	9985	0	...	1.70	4.50	3.60	1.65	4.50	3.60	1.65	4.50	3.50	1.67
18	309	1	1	2009/2010	1	2009-08-02 00:00:00	665321	9984	9991	1	...	2.30	2.62	3.25	2.40	2.70	3.20	2.40	2.88	3.10	2.30
19	310	1	1	2009/2010	1	2009-08-01 00:00:00	665322	9994	10000	1	...	3.30	2.00	3.25	3.40	2.10	3.30	3.10	2.25	3.20	2.80
20	311	1	1	2009/2010	1	2009-08-01 00:00:00	665323	8571	8635	0	...	1.57	5.50	3.60	1.57	5.25	3.50	1.60	5.50	3.60	1.53
21	314	1	1	2009/2010	10	2009-10-04 00:00:00	665411	8342	8635	4	...	2.75	2.40	3.25	2.62	2.45	3.20	2.70	2.38	3.20	2.62
22	318	1	1	2009/2010	10	2009-10-03 00:00:00	665417	8203	9994	2	...	4.00	1.85	3.30	3.75	1.95	3.25	3.75	1.91	3.25	3.60
23	320	1	1	2009/2010	10	2009-10-03 00:00:00	665421	9993	10001	3	...	4.50	1.73	3.40	4.33	1.75	3.40	4.50	1.75	3.30	4.20
24	321	1	1	2009/2010	11	2009-10-18 00:00:00	665425	8342	9991	1	...	4.75	1.70	3.50	4.00	1.60	3.50	5.25	1.67	3.40	4.75
25	323	1	1	2009/2010	11	2009-10-18 00:00:00	665427	9987	8203	1	...	4.33	1.75	3.40	4.33	1.83	3.30	4.00	1.83	3.30	3.80
26	324	1	1	2009/2010	11	2009-10-17 00:00:00	665429	10000	9993	4	...	3.60	1.90	3.30	3.60	2.05	3.25	3.30	2.00	3.25	3.30
27	325	1	1	2009/2010	11	2009-10-17 00:00:00	665430	9994	9984	1	...	3.40	2.00	3.25	3.40	2.10	3.20	3.20	2.05	3.25	3.20
28	327	1	1	2009/2010	11	2009-10-17 00:00:00	665435	10001	9985	2	...	1.67	5.50	3.40	1.60	5.00	3.40	1.65	4.75	3.40	1.67
29	328	1	1	2009/2010	12	2009-10-24 00:00:00	665438	9985	10000	1	...	6.50	1.44	3.75	6.50	1.40	4.00	7.50	1.44	3.75	6.50
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
19570	24492	21484	21484	2015/2016	6	2015-09-27 00:00:00	2030140	9783	8558	3	...	NaN	2.10	3.40	3.90	NaN	NaN	NaN	NaN	NaN	NaN
19571	24493	21484	21484	2015/2016	6	2015-09-27 00:00:00	2030141	9869	8603	1	...	NaN	2.30	3.30	3.40	NaN	NaN	NaN	NaN	NaN	NaN
19572	24494	21484	21484	2015/2016	7	2015-10-03 00:00:00	2030142	8302	8634	2	...	NaN	4.10	3.90	1.91	NaN	NaN	NaN	NaN	NaN	NaN
19573	24496	21484	21484	2015/2016	7	2015-10-02 00:00:00	2030144	9910	8305	0	...	NaN	1.50	4.50	7.50	NaN	NaN	NaN	NaN	NaN	NaN
19574	24497	21484	21484	2015/2016	7	2015-10-04 00:00:00	2030145	8581	10205	1	...	NaN	4.40	3.60	1.93	NaN	NaN	NaN	NaN	NaN	NaN
19575	24498	21484	21484	2015/2016	7	2015-10-04 00:00:00	2030146	9906	8633	1	...	NaN	3.13	3.40	2.45	NaN	NaN	NaN	NaN	NaN	NaN
19576	24499	21484	21484	2015/2016	7	2015-10-03 00:00:00	2030147	9864	8560	3	...	NaN	2.40	3.25	3.30	NaN	NaN	NaN	NaN	NaN	NaN
19577	24500	21484	21484	2015/2016	7	2015-10-04 00:00:00	2030148	8315	10267	3	...	NaN	2.38	3.40	3.25	NaN	NaN	NaN	NaN	NaN	NaN
19578	24501	21484	21484	2015/2016	7	2015-10-03 00:00:00	2030149	7878	9783	1	...	NaN	2.63	3.25	3.00	NaN	NaN	NaN	NaN	NaN	NaN
19579	24502	21484	21484	2015/2016	7	2015-10-03 00:00:00	2030150	8558	9869	1	...	NaN	2.00	3.40	4.30	NaN	NaN	NaN	NaN	NaN	NaN
19580	24503	21484	21484	2015/2016	7	2015-10-04 00:00:00	2030151	8370	8603	0	...	NaN	2.20	3.80	3.30	NaN	NaN	NaN	NaN	NaN	NaN
19581	24504	21484	21484	2015/2016	8	2015-10-17 00:00:00	2030152	8634	8370	5	...	NaN	1.15	9.50	19.00	NaN	NaN	NaN	NaN	NaN	NaN
19582	24505	21484	21484	2015/2016	8	2015-10-17 00:00:00	2030153	8372	8302	1	...	NaN	3.60	3.50	2.20	NaN	NaN	NaN	NaN	NaN	NaN
19583	24507	21484	21484	2015/2016	8	2015-10-18 00:00:00	2030155	10205	9910	1	...	NaN	2.05	3.70	3.75	NaN	NaN	NaN	NaN	NaN	NaN
19584	24508	21484	21484	2015/2016	8	2015-10-17 00:00:00	2030156	8633	8581	3	...	NaN	1.08	13.00	34.00	NaN	NaN	NaN	NaN	NaN	NaN
19585	24509	21484	21484	2015/2016	8	2015-10-18 00:00:00	2030157	8560	9906	0	...	NaN	4.30	3.30	2.05	NaN	NaN	NaN	NaN	NaN	NaN
19586	24510	21484	21484	2015/2016	8	2015-10-17 00:00:00	2030158	10267	9864	3	...	NaN	1.62	4.10	6.25	NaN	NaN	NaN	NaN	NaN	NaN
19587	24511	21484	21484	2015/2016	8	2015-10-18 00:00:00	2030159	9783	8315	2	...	NaN	2.70	3.25	2.88	NaN	NaN	NaN	NaN	NaN	NaN
19588	24512	21484	21484	2015/2016	8	2015-10-19 00:00:00	2030160	9869	7878	3	...	NaN	2.00	3.40	4.33	NaN	NaN	NaN	NaN	NaN	NaN
19589	24513	21484	21484	2015/2016	8	2015-10-17 00:00:00	2030161	8603	8558	1	...	NaN	2.05	3.50	4.00	NaN	NaN	NaN	NaN	NaN	NaN
19590	24514	21484	21484	2015/2016	9	2015-10-25 00:00:00	2030162	8634	8372	3	...	NaN	1.11	11.00	31.00	NaN	NaN	NaN	NaN	NaN	NaN
19591	24515	21484	21484	2015/2016	9	2015-10-24 00:00:00	2030163	8302	8305	5	...	NaN	1.50	4.60	7.50	NaN	NaN	NaN	NaN	NaN	NaN
19592	24516	21484	21484	2015/2016	9	2015-10-25 00:00:00	2030164	8306	10205	0	...	NaN	3.60	3.40	2.20	NaN	NaN	NaN	NaN	NaN	NaN
19593	24517	21484	21484	2015/2016	9	2015-10-24 00:00:00	2030165	9910	8633	1	...	NaN	3.90	4.00	1.93	NaN	NaN	NaN	NaN	NaN	NaN
19594	24518	21484	21484	2015/2016	9	2015-10-25 00:00:00	2030166	8581	8560	0	...	NaN	2.75	3.13	2.90	NaN	NaN	NaN	NaN	NaN	NaN
19595	24519	21484	21484	2015/2016	9	2015-10-25 00:00:00	2030167	9906	10267	2	...	NaN	1.57	4.00	7.00	NaN	NaN	NaN	NaN	NaN	NaN
19596	24520	21484	21484	2015/2016	9	2015-10-24 00:00:00	2030168	9864	9783	2	...	NaN	2.30	3.40	3.40	NaN	NaN	NaN	NaN	NaN	NaN
19597	24521	21484	21484	2015/2016	9	2015-10-26 00:00:00	2030169	8315	9869	3	...	NaN	1.55	4.20	7.00	NaN	NaN	NaN	NaN	NaN	NaN
19598	24522	21484	21484	2015/2016	9	2015-10-24 00:00:00	2030170	7878	8603	1	...	NaN	2.30	3.40	3.30	NaN	NaN	NaN	NaN	NaN	NaN
19599	24523	21484	21484	2015/2016	9	2015-10-23 00:00:00	2030171	8370	8558	3	...	NaN	2.20	3.60	3.50	NaN	NaN	NaN	NaN	NaN	NaN

	player_api_id	player_api_id	date_stat
0	505942	505942	2016-02-18 00:00:00
1	505942	505942	2015-11-19 00:00:00
2	505942	505942	2015-09-21 00:00:00
3	505942	505942	2015-03-20 00:00:00
4	505942	505942	2007-02-22 00:00:00
5	155782	155782	2016-04-21 00:00:00
6	155782	155782	2016-04-07 00:00:00
7	155782	155782	2016-01-07 00:00:00
8	155782	155782	2015-12-24 00:00:00
9	155782	155782	2015-12-17 00:00:00
10	155782	155782	2015-10-16 00:00:00
11	155782	155782	2015-09-25 00:00:00
12	155782	155782	2015-09-21 00:00:00
13	155782	155782	2015-01-09 00:00:00
14	155782	155782	2014-12-05 00:00:00
15	155782	155782	2014-11-07 00:00:00
16	155782	155782	2014-09-18 00:00:00
17	155782	155782	2014-05-02 00:00:00
18	155782	155782	2014-04-04 00:00:00
19	155782	155782	2014-03-14 00:00:00
20	155782	155782	2013-12-13 00:00:00
21	155782	155782	2013-11-08 00:00:00
22	155782	155782	2013-10-04 00:00:00
23	155782	155782	2013-09-20 00:00:00
24	155782	155782	2013-05-03 00:00:00
25	155782	155782	2013-03-22 00:00:00
26	155782	155782	2013-03-15 00:00:00
27	155782	155782	2013-02-22 00:00:00
28	155782	155782	2013-02-15 00:00:00
29	155782	155782	2012-08-31 00:00:00
...	...	...	...
183223	108760	108760	2014-09-18 00:00:00
183224	108760	108760	2013-12-06 00:00:00
183225	108760	108760	2013-11-22 00:00:00
183226	108760	108760	2013-09-20 00:00:00
183227	108760	108760	2009-08-30 00:00:00
183228	108760	108760	2007-02-22 00:00:00
183229	39494	39494	2016-04-14 00:00:00
183230	39494	39494	2016-02-11 00:00:00
183231	39494	39494	2015-10-09 00:00:00
183232	39494	39494	2015-09-21 00:00:00
183233	39494	39494	2015-01-09 00:00:00
183234	39494	39494	2014-10-31 00:00:00
183235	39494	39494	2014-09-18 00:00:00
183236	39494	39494	2014-03-28 00:00:00
183237	39494	39494	2014-03-14 00:00:00
183238	39494	39494	2014-02-28 00:00:00
183239	39494	39494	2013-09-20 00:00:00
183240	39494	39494	2013-04-26 00:00:00
183241	39494	39494	2013-02-15 00:00:00
183242	39494	39494	2012-08-31 00:00:00
183243	39494	39494	2012-02-22 00:00:00
183244	39494	39494	2011-08-30 00:00:00
183245	39494	39494	2011-02-22 00:00:00
183246	39494	39494	2010-08-30 00:00:00
183247	39494	39494	2010-02-22 00:00:00
183248	39494	39494	2009-08-30 00:00:00
183249	39494	39494	2009-02-22 00:00:00
183250	39494	39494	2008-08-30 00:00:00
183251	39494	39494	2007-08-30 00:00:00
183252	39494	39494	2007-02-22 00:00:00