notebook.community

Edit and run



In [167]:

    
#points matplot lib to plot the figure inside notebook
#pandas is a data science library
#matplotlib: for data plotting
#matplotlib.pyplot : to define a plot, part of 
%matplotlib inline 
import pandas as pd 
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.cross_validation import train_test_split
from sklearn import linear_model
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVC
from sklearn import tree



In [168]:

    
#pandas work with data frames
#read_csv

df = pd.read_csv('ScoreBoardFinal.csv')
df.head(10)









    Out[168]:







  
    
      
      MId
      Home_Team
      Home_Team_Goals
      Away_Team_Goals
      Away_Team
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      ...
      HTP
      HTR
      HTAR
      HTMR
      HTDR
      ATP
      ATR
      ATAR
      ATMR
      ATDR
    
  
  
    
      0
      9601
      CAR
      1
      2
      CHE
      36.8
      63.2
      3
      7
      10
      ...
      14
      775.406
      151.256
      269.512
      354.638
      14
      883.673
      166.000
      315.499
      402.174
    
    
      1
      9602
      FUL
      2
      2
      CRY
      66.1
      33.9
      5
      6
      15
      ...
      14
      793.834
      182.616
      210.218
      401.000
      14
      790.224
      144.288
      287.936
      358.000
    
    
      2
      9603
      HUL
      0
      2
      EVE
      43
      57
      3
      4
      12
      ...
      14
      792.189
      216.189
      219.000
      357.000
      14
      862.968
      173.566
      293.402
      396.000
    
    
      3
      9604
      LIV
      2
      1
      NUFC
      66.2
      33.8
      5
      2
      13
      ...
      14
      884.043
      269.651
      220.392
      394.000
      14
      827.956
      84.175
      294.260
      449.521
    
    
      4
      9605
      MCFC
      2
      0
      WHU
      68.1
      31.9
      7
      0
      28
      ...
      14
      912.543
      155.313
      350.230
      407.000
      14
      808.489
      129.325
      315.164
      364.000
    
    
      5
      9606
      NOR
      0
      2
      ARS
      37.7
      62.3
      5
      8
      11
      ...
      14
      805.636
      93.648
      341.988
      370.000
      14
      869.152
      230.624
      252.528
      386.000
    
    
      6
      9607
      SOT
      1
      1
      MUFC
      58.5
      41.5
      6
      2
      15
      ...
      14
      830.718
      157.239
      296.034
      377.445
      14
      883.726
      251.335
      229.391
      403.000
    
    
      7
      9608
      SUN
      1
      3
      SWAN
      53.3
      46.7
      4
      4
      20
      ...
      14
      798.745
      76.958
      361.257
      360.530
      14
      795.936
      278.427
      163.175
      354.334
    
    
      8
      9609
      TOT
      3
      0
      AVL
      54.7
      45.3
      6
      1
      12
      ...
      14
      841.663
      148.000
      291.936
      401.727
      13
      800.032
      181.180
      183.000
      435.852
    
    
      9
      9610
      WBA
      1
      2
      STK
      43
      57
      4
      4
      18
      ...
      14
      810.201
      166.403
      349.500
      294.298
      14
      793.845
      85.944
      349.901
      358.000
    
  

10 rows × 33 columns



In [169]:

    
#.loc returns the rows based on the filters or conditions
df_tmp = df.loc[df['Home_ShotsT'] != 'XX-XX-']
df_tmp.head(10)









    Out[169]:







  
    
      
      MId
      Home_Team
      Home_Team_Goals
      Away_Team_Goals
      Away_Team
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      ...
      HTP
      HTR
      HTAR
      HTMR
      HTDR
      ATP
      ATR
      ATAR
      ATMR
      ATDR
    
  
  
    
      0
      9601
      CAR
      1
      2
      CHE
      36.8
      63.2
      3
      7
      10
      ...
      14
      775.406
      151.256
      269.512
      354.638
      14
      883.673
      166.000
      315.499
      402.174
    
    
      1
      9602
      FUL
      2
      2
      CRY
      66.1
      33.9
      5
      6
      15
      ...
      14
      793.834
      182.616
      210.218
      401.000
      14
      790.224
      144.288
      287.936
      358.000
    
    
      2
      9603
      HUL
      0
      2
      EVE
      43
      57
      3
      4
      12
      ...
      14
      792.189
      216.189
      219.000
      357.000
      14
      862.968
      173.566
      293.402
      396.000
    
    
      3
      9604
      LIV
      2
      1
      NUFC
      66.2
      33.8
      5
      2
      13
      ...
      14
      884.043
      269.651
      220.392
      394.000
      14
      827.956
      84.175
      294.260
      449.521
    
    
      4
      9605
      MCFC
      2
      0
      WHU
      68.1
      31.9
      7
      0
      28
      ...
      14
      912.543
      155.313
      350.230
      407.000
      14
      808.489
      129.325
      315.164
      364.000
    
    
      5
      9606
      NOR
      0
      2
      ARS
      37.7
      62.3
      5
      8
      11
      ...
      14
      805.636
      93.648
      341.988
      370.000
      14
      869.152
      230.624
      252.528
      386.000
    
    
      6
      9607
      SOT
      1
      1
      MUFC
      58.5
      41.5
      6
      2
      15
      ...
      14
      830.718
      157.239
      296.034
      377.445
      14
      883.726
      251.335
      229.391
      403.000
    
    
      7
      9608
      SUN
      1
      3
      SWAN
      53.3
      46.7
      4
      4
      20
      ...
      14
      798.745
      76.958
      361.257
      360.530
      14
      795.936
      278.427
      163.175
      354.334
    
    
      8
      9609
      TOT
      3
      0
      AVL
      54.7
      45.3
      6
      1
      12
      ...
      14
      841.663
      148.000
      291.936
      401.727
      13
      800.032
      181.180
      183.000
      435.852
    
    
      9
      9610
      WBA
      1
      2
      STK
      43
      57
      4
      4
      18
      ...
      14
      810.201
      166.403
      349.500
      294.298
      14
      793.845
      85.944
      349.901
      358.000
    
  

10 rows × 33 columns



In [170]:

    
df = df_tmp.ix[:,'Home_Poss':]
df = df.apply(lambda x: pd.to_numeric(x, errors = 'ignore'))
names = df.columns
correlations = df.corr()
# plot correlation matrix
correlations









    



C:\Users\I336006\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.






    Out[170]:







  
    
      
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      Away_Shots
      Home_Touches
      Away_Touches
      Home_Passes
      Away_Passes
      ...
      HTP
      HTR
      HTAR
      HTMR
      HTDR
      ATP
      ATR
      ATAR
      ATMR
      ATDR
    
  
  
    
      Home_Poss
      1.000000
      -1.000000
      0.316120
      -0.315443
      0.524575
      -0.475808
      0.895654
      -0.897972
      0.877316
      -0.884429
      ...
      0.025225
      0.464898
      0.190234
      0.042298
      0.065889
      0.005962
      -0.473204
      -0.240212
      0.003324
      -0.066883
    
    
      Away_Poss
      -1.000000
      1.000000
      -0.316120
      0.315443
      -0.524575
      0.475808
      -0.895654
      0.897972
      -0.877316
      0.884429
      ...
      -0.025225
      -0.464898
      -0.190234
      -0.042298
      -0.065889
      -0.005962
      0.473204
      0.240212
      -0.003324
      0.066883
    
    
      Home_ShotsT
      0.316120
      -0.316120
      1.000000
      -0.173285
      0.665978
      -0.250153
      0.332443
      -0.279791
      0.315805
      -0.266711
      ...
      0.053084
      0.330653
      0.133486
      0.005930
      0.086834
      0.070499
      -0.219733
      -0.112282
      0.001539
      -0.029892
    
    
      Away_ShotsT
      -0.315443
      0.315443
      -0.173285
      1.000000
      -0.229451
      0.630199
      -0.277174
      0.326558
      -0.284377
      0.299902
      ...
      0.027838
      -0.195858
      -0.073407
      -0.008440
      -0.052068
      -0.010597
      0.323539
      0.106688
      0.010279
      0.115561
    
    
      Home_Shots
      0.524575
      -0.524575
      0.665978
      -0.229451
      1.000000
      -0.354441
      0.461211
      -0.493154
      0.419769
      -0.493644
      ...
      -0.014093
      0.309978
      0.139142
      0.010991
      0.052610
      0.027267
      -0.305942
      -0.119271
      -0.040202
      -0.031136
    
    
      Away_Shots
      -0.475808
      0.475808
      -0.250153
      0.630199
      -0.354441
      1.000000
      -0.436438
      0.438792
      -0.443691
      0.396951
      ...
      0.006007
      -0.274016
      -0.086549
      -0.057355
      -0.025990
      -0.031674
      0.302355
      0.131967
      -0.009907
      0.089040
    
    
      Home_Touches
      0.895654
      -0.895654
      0.332443
      -0.277174
      0.461211
      -0.436438
      1.000000
      -0.646915
      0.980318
      -0.625277
      ...
      0.038890
      0.558134
      0.213400
      0.029445
      0.133905
      0.019861
      -0.327717
      -0.200768
      0.018656
      -0.018890
    
    
      Away_Touches
      -0.897972
      0.897972
      -0.279791
      0.326558
      -0.493154
      0.438792
      -0.646915
      1.000000
      -0.625229
      0.977526
      ...
      -0.004221
      -0.320504
      -0.153847
      -0.040757
      0.005563
      0.003360
      0.554541
      0.247586
      0.010373
      0.108410
    
    
      Home_Passes
      0.877316
      -0.877316
      0.315805
      -0.284377
      0.419769
      -0.443691
      0.980318
      -0.625229
      1.000000
      -0.577590
      ...
      0.052318
      0.561553
      0.199638
      0.044998
      0.132922
      0.020404
      -0.323569
      -0.204948
      0.029555
      -0.026147
    
    
      Away_Passes
      -0.884429
      0.884429
      -0.266711
      0.299902
      -0.493644
      0.396951
      -0.625277
      0.977526
      -0.577590
      1.000000
      ...
      0.001624
      -0.295116
      -0.156138
      -0.025143
      0.007073
      0.007578
      0.540404
      0.227474
      0.025252
      0.102752
    
    
      Home_Tackles
      -0.172459
      0.172459
      -0.012261
      0.070435
      -0.085394
      0.047551
      -0.074807
      0.246762
      -0.134380
      0.157641
      ...
      0.013462
      0.077214
      0.033283
      -0.007918
      0.031578
      -0.013654
      0.200828
      0.035920
      0.054348
      0.041557
    
    
      Away_Tackles
      0.169691
      -0.169691
      -0.019757
      -0.001110
      0.003944
      -0.107036
      0.232605
      -0.079300
      0.139685
      -0.147408
      ...
      -0.014019
      0.164441
      0.003048
      0.047440
      0.066700
      0.011347
      0.052493
      -0.009029
      0.029952
      0.014225
    
    
      Home_Clearances
      -0.277403
      0.277403
      -0.095408
      0.022782
      -0.188163
      0.262887
      -0.269608
      0.143354
      -0.346759
      0.095620
      ...
      -0.045484
      -0.197748
      0.033539
      -0.135742
      -0.012801
      -0.020916
      -0.002795
      0.084873
      -0.095043
      0.018308
    
    
      Away_Clearances
      0.214161
      -0.214161
      -0.010117
      -0.027719
      0.215589
      -0.118271
      0.073087
      -0.200855
      0.010092
      -0.306799
      ...
      -0.036738
      -0.167104
      0.043588
      -0.081123
      -0.084927
      -0.008852
      -0.107777
      0.045625
      -0.110708
      0.008021
    
    
      Home_Corners
      0.441836
      -0.441836
      0.267085
      -0.183954
      0.495146
      -0.317978
      0.335527
      -0.414290
      0.295668
      -0.436616
      ...
      -0.027674
      0.198027
      0.057102
      0.023322
      0.054729
      -0.027624
      -0.228946
      -0.071335
      -0.037770
      -0.038928
    
    
      Away_Corners
      -0.362764
      0.362764
      -0.163933
      0.244439
      -0.261911
      0.498755
      -0.354564
      0.267462
      -0.374861
      0.228546
      ...
      0.015213
      -0.132627
      -0.026517
      -0.062478
      0.018675
      -0.046770
      0.210584
      0.087795
      0.029492
      0.009556
    
    
      Home_Offsides
      -0.017331
      0.017331
      0.026309
      -0.024810
      -0.018583
      -0.049601
      -0.004910
      0.002334
      -0.020296
      0.006835
      ...
      0.058818
      0.044848
      0.033794
      -0.006883
      0.000802
      -0.007569
      0.059502
      -0.037806
      0.022949
      0.077124
    
    
      Away_Offsides
      -0.016569
      0.016569
      0.011807
      0.038690
      -0.009970
      -0.032591
      -0.027856
      0.016967
      -0.027339
      -0.002533
      ...
      -0.006855
      -0.016913
      -0.005802
      -0.032585
      0.043951
      0.009871
      0.051386
      0.031946
      0.001091
      -0.004269
    
    
      HTP
      0.025225
      -0.025225
      0.053084
      0.027838
      -0.014093
      0.006007
      0.038890
      -0.004221
      0.052318
      0.001624
      ...
      1.000000
      0.056394
      -0.062121
      0.083333
      0.011258
      0.033470
      -0.054241
      -0.053437
      0.013525
      0.011634
    
    
      HTR
      0.464898
      -0.464898
      0.330653
      -0.195858
      0.309978
      -0.274016
      0.558134
      -0.320504
      0.561553
      -0.295116
      ...
      0.056394
      1.000000
      0.204030
      0.143205
      0.359908
      0.001965
      -0.014041
      -0.116419
      0.083728
      0.033848
    
    
      HTAR
      0.190234
      -0.190234
      0.133486
      -0.073407
      0.139142
      -0.086549
      0.213400
      -0.153847
      0.199638
      -0.156138
      ...
      -0.062121
      0.204030
      1.000000
      -0.748693
      -0.120574
      0.042061
      -0.054006
      -0.033736
      0.024187
      -0.036268
    
    
      HTMR
      0.042298
      -0.042298
      0.005930
      -0.008440
      0.010991
      -0.057355
      0.029445
      -0.040757
      0.044998
      -0.025143
      ...
      0.083333
      0.143205
      -0.748693
      1.000000
      -0.328857
      -0.031348
      0.035144
      -0.006556
      0.008865
      0.028434
    
    
      HTDR
      0.065889
      -0.065889
      0.086834
      -0.052068
      0.052610
      -0.025990
      0.133905
      0.005563
      0.132922
      0.007073
      ...
      0.011258
      0.359908
      -0.120574
      -0.328857
      1.000000
      -0.011105
      0.012031
      -0.042989
      0.024615
      0.038627
    
    
      ATP
      0.005962
      -0.005962
      0.070499
      -0.010597
      0.027267
      -0.031674
      0.019861
      0.003360
      0.020404
      0.007578
      ...
      0.033470
      0.001965
      0.042061
      -0.031348
      -0.011105
      1.000000
      0.026136
      -0.046073
      0.074177
      -0.023717
    
    
      ATR
      -0.473204
      0.473204
      -0.219733
      0.323539
      -0.305942
      0.302355
      -0.327717
      0.554541
      -0.323569
      0.540404
      ...
      -0.054241
      -0.014041
      -0.054006
      0.035144
      0.012031
      0.026136
      1.000000
      0.205996
      0.166793
      0.332486
    
    
      ATAR
      -0.240212
      0.240212
      -0.112282
      0.106688
      -0.119271
      0.131967
      -0.200768
      0.247586
      -0.204948
      0.227474
      ...
      -0.053437
      -0.116419
      -0.033736
      -0.006556
      -0.042989
      -0.046073
      0.205996
      1.000000
      -0.755884
      -0.152354
    
    
      ATMR
      0.003324
      -0.003324
      0.001539
      0.010279
      -0.040202
      -0.009907
      0.018656
      0.010373
      0.029555
      0.025252
      ...
      0.013525
      0.083728
      0.024187
      0.008865
      0.024615
      0.074177
      0.166793
      -0.755884
      1.000000
      -0.280309
    
    
      ATDR
      -0.066883
      0.066883
      -0.029892
      0.115561
      -0.031136
      0.089040
      -0.018890
      0.108410
      -0.026147
      0.102752
      ...
      0.011634
      0.033848
      -0.036268
      0.028434
      0.038627
      -0.023717
      0.332486
      -0.152354
      -0.280309
      1.000000
    
  

28 rows × 28 columns



In [171]:

    
fig = plt.figure(figsize=(35,35))
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,28,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.show()



In [172]:

    
df = df_tmp
df['Home_Team_Factor'] = 1
df['Away_Team_Factor'] = -1
df.head()









    Out[172]:







  
    
      
      MId
      Home_Team
      Home_Team_Goals
      Away_Team_Goals
      Away_Team
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      ...
      HTAR
      HTMR
      HTDR
      ATP
      ATR
      ATAR
      ATMR
      ATDR
      Home_Team_Factor
      Away_Team_Factor
    
  
  
    
      0
      9601
      CAR
      1
      2
      CHE
      36.8
      63.2
      3
      7
      10
      ...
      151.256
      269.512
      354.638
      14
      883.673
      166.000
      315.499
      402.174
      1
      -1
    
    
      1
      9602
      FUL
      2
      2
      CRY
      66.1
      33.9
      5
      6
      15
      ...
      182.616
      210.218
      401.000
      14
      790.224
      144.288
      287.936
      358.000
      1
      -1
    
    
      2
      9603
      HUL
      0
      2
      EVE
      43
      57
      3
      4
      12
      ...
      216.189
      219.000
      357.000
      14
      862.968
      173.566
      293.402
      396.000
      1
      -1
    
    
      3
      9604
      LIV
      2
      1
      NUFC
      66.2
      33.8
      5
      2
      13
      ...
      269.651
      220.392
      394.000
      14
      827.956
      84.175
      294.260
      449.521
      1
      -1
    
    
      4
      9605
      MCFC
      2
      0
      WHU
      68.1
      31.9
      7
      0
      28
      ...
      155.313
      350.230
      407.000
      14
      808.489
      129.325
      315.164
      364.000
      1
      -1
    
  

5 rows × 35 columns



In [173]:

    
df['Winning_Team'] = 0
df.loc[df.Home_Team_Goals == df.Away_Team_Goals,'Winning_Team'] = 0
df.loc[df.Home_Team_Goals > df.Away_Team_Goals,'Winning_Team'] = 1
df.loc[df.Home_Team_Goals < df.Away_Team_Goals,'Winning_Team'] = 2
df.head()









    Out[173]:







  
    
      
      MId
      Home_Team
      Home_Team_Goals
      Away_Team_Goals
      Away_Team
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      ...
      HTMR
      HTDR
      ATP
      ATR
      ATAR
      ATMR
      ATDR
      Home_Team_Factor
      Away_Team_Factor
      Winning_Team
    
  
  
    
      0
      9601
      CAR
      1
      2
      CHE
      36.8
      63.2
      3
      7
      10
      ...
      269.512
      354.638
      14
      883.673
      166.000
      315.499
      402.174
      1
      -1
      2
    
    
      1
      9602
      FUL
      2
      2
      CRY
      66.1
      33.9
      5
      6
      15
      ...
      210.218
      401.000
      14
      790.224
      144.288
      287.936
      358.000
      1
      -1
      0
    
    
      2
      9603
      HUL
      0
      2
      EVE
      43
      57
      3
      4
      12
      ...
      219.000
      357.000
      14
      862.968
      173.566
      293.402
      396.000
      1
      -1
      2
    
    
      3
      9604
      LIV
      2
      1
      NUFC
      66.2
      33.8
      5
      2
      13
      ...
      220.392
      394.000
      14
      827.956
      84.175
      294.260
      449.521
      1
      -1
      1
    
    
      4
      9605
      MCFC
      2
      0
      WHU
      68.1
      31.9
      7
      0
      28
      ...
      350.230
      407.000
      14
      808.489
      129.325
      315.164
      364.000
      1
      -1
      1
    
  

5 rows × 36 columns



In [174]:

    
cols = ['MId', 'Home_Team', 'Home_Team_Goals', 'Away_Team_Goals','Away_Team', 'Home_Poss', 'Away_Poss', 'Home_ShotsT','Away_ShotsT', 'Home_Shots', 'Away_Shots', 'Home_Touches','Away_Touches', 'Home_Passes', 'Away_Passes', 'Home_Tackles','Away_Tackles', 'Home_Clearances', 'Away_Clearances','Home_Corners', 'Away_Corners', 'Home_Offsides', 'Away_Offsides','HTP','ATP', 'HTR','ATR', 'HTAR', 'ATAR', 'HTMR', 'ATMR', 'HTDR','ATDR', 'Home_Team_Factor', 'Away_Team_Factor','Winning_Team']
df_final = df[cols]
df_final.head()









    Out[174]:







  
    
      
      MId
      Home_Team
      Home_Team_Goals
      Away_Team_Goals
      Away_Team
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      ...
      ATR
      HTAR
      ATAR
      HTMR
      ATMR
      HTDR
      ATDR
      Home_Team_Factor
      Away_Team_Factor
      Winning_Team
    
  
  
    
      0
      9601
      CAR
      1
      2
      CHE
      36.8
      63.2
      3
      7
      10
      ...
      883.673
      151.256
      166.000
      269.512
      315.499
      354.638
      402.174
      1
      -1
      2
    
    
      1
      9602
      FUL
      2
      2
      CRY
      66.1
      33.9
      5
      6
      15
      ...
      790.224
      182.616
      144.288
      210.218
      287.936
      401.000
      358.000
      1
      -1
      0
    
    
      2
      9603
      HUL
      0
      2
      EVE
      43
      57
      3
      4
      12
      ...
      862.968
      216.189
      173.566
      219.000
      293.402
      357.000
      396.000
      1
      -1
      2
    
    
      3
      9604
      LIV
      2
      1
      NUFC
      66.2
      33.8
      5
      2
      13
      ...
      827.956
      269.651
      84.175
      220.392
      294.260
      394.000
      449.521
      1
      -1
      1
    
    
      4
      9605
      MCFC
      2
      0
      WHU
      68.1
      31.9
      7
      0
      28
      ...
      808.489
      155.313
      129.325
      350.230
      315.164
      407.000
      364.000
      1
      -1
      1
    
  

5 rows × 36 columns



In [175]:

    
df_final = df_final.loc[:,'Home_Poss':]
df_final.head()









    Out[175]:







  
    
      
      Home_Poss
      Away_Poss
      Home_ShotsT
      Away_ShotsT
      Home_Shots
      Away_Shots
      Home_Touches
      Away_Touches
      Home_Passes
      Away_Passes
      ...
      ATR
      HTAR
      ATAR
      HTMR
      ATMR
      HTDR
      ATDR
      Home_Team_Factor
      Away_Team_Factor
      Winning_Team
    
  
  
    
      0
      36.8
      63.2
      3
      7
      10
      28
      536
      801
      317
      568
      ...
      883.673
      151.256
      166.000
      269.512
      315.499
      354.638
      402.174
      1
      -1
      2
    
    
      1
      66.1
      33.9
      5
      6
      15
      15
      717
      463
      496
      249
      ...
      790.224
      182.616
      144.288
      210.218
      287.936
      401.000
      358.000
      1
      -1
      0
    
    
      2
      43
      57
      3
      4
      12
      11
      627
      790
      459
      623
      ...
      862.968
      216.189
      173.566
      219.000
      293.402
      357.000
      396.000
      1
      -1
      2
    
    
      3
      66.2
      33.8
      5
      2
      13
      8
      851
      515
      673
      333
      ...
      827.956
      269.651
      84.175
      220.392
      294.260
      394.000
      449.521
      1
      -1
      1
    
    
      4
      68.1
      31.9
      7
      0
      28
      3
      772
      431
      585
      259
      ...
      808.489
      155.313
      129.325
      350.230
      315.164
      407.000
      364.000
      1
      -1
      1
    
  

5 rows × 31 columns



In [176]:

    
df_fn = df_final.apply(lambda x : pd.to_numeric(x,errors='ignore'))
type(df_fn['Home_Poss'][0])









    Out[176]:





numpy.float64



In [177]:

    
#conversion of dataframe to a vector(Numpy Array)
arr_df = df_fn.values
arr_df.shape









    Out[177]:





(1136L, 31L)



In [178]:

    
y_train = arr_df[:,-1]
y_train
y_train.shape









    Out[178]:





(1136L,)

Removing the "Winning_Team Column" to prepare the X-Train Values



In [179]:

    
x_train = arr_df[:,:-1]
x_train.shape









    Out[179]:





(1136L, 30L)



In [180]:

    
x_htrain = x_train[:,0::2]
x_htrain.shape









    Out[180]:





(1136L, 15L)



In [181]:

    
x_atrain = x_train[:,1::2]
x_atrain.shape









    Out[181]:





(1136L, 15L)



In [182]:

    
x_atrain[0,-1]









    Out[182]:





-1.0



In [183]:

    
x_train = x_htrain - x_atrain
x_train









    Out[183]:





array([[ -26.4  ,   -4.   ,  -18.   , ...,  -45.987,  -47.536,    2.   ],
       [  32.2  ,   -1.   ,    0.   , ...,  -77.718,   43.   ,    2.   ],
       [ -14.   ,   -1.   ,    1.   , ...,  -74.402,  -39.   ,    2.   ],
       ..., 
       [ -12.   ,    3.   ,    9.   , ...,  -48.444,   -0.679,    2.   ],
       [  25.8  ,   -1.   ,    6.   , ...,   61.356,  -86.   ,    2.   ],
       [   0.   ,   -3.   ,    0.   , ...,  107.686,   -2.6  ,    2.   ]])



In [184]:

    
x_train.shape









    Out[184]:





(1136L, 15L)



In [185]:

    
#model = linear_model.LogisticRegression()
#model = svm.SVC()
#model = svm.SVC(kernel='poly',degree=2)
#model = tree.DecisionTreeRegressor()
#model = tree.DecisionTreeClassifier()
#model = linear_model.Lasso()
#model = GradientBoostingClassifier(n_estimators=100)
#model = AdaBoostClassifier(n_estimators=100)
#model = RandomForestClassifier(n_estimators=64)
model = GradientBoostingRegressor(n_estimators=100, max_depth=5)
#model = linear_model.BayesianRidge()



In [186]:

    
def showFeatureImportance(my_categories):
    fx_imp = pd.Series(model.feature_importances_, index=my_categories)
    fx_imp /= fx_imp.max()
    fx_imp.sort_values(inplace = True)
    fx_imp.plot(kind='barh')



In [187]:

    
def regressor(preds):
    preds[preds >= 1.5] = 2
    preds[preds < .50] = 0
    preds[np.logical_and(preds >= .50,preds < 1.5)] = 1
    return preds



In [188]:

    
categories = ['Poss', 'ShotsT', 'Shots','Touches', 'Passes', 'Tackles', 'Clearances','Corners', 'Offsides','TP', 'TR', 'TAR', 'TMR', 'TDR', 'Team_Factor']
accuracy=[]

for i in range(1):
    X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train)
    print X_test.shape
    results = model.fit(X_train, Y_train)
    preds = model.predict(X_test)
    print preds
    #test = preds >= 0.55
    #print test
    preds = regressor(preds)
    accuracy.append(np.mean(preds == Y_test))
    #accuracy.append(np.mean(predictions == Y_test))
    print "Finished iteration:", i

print preds
print Y_test
                         
print "The accuracy is", sum(accuracy)/len(accuracy)
showFeatureImportance(categories)









    



(284L, 15L)
[ 0.76608731  1.49126085  0.75174287  1.06566833  0.5451756   0.70221122
  0.52829409  1.19774765  0.87298389  1.10835672  1.57124031  0.87136169
  1.43175693  0.5258034   0.65108393  0.73965704  0.94635875  0.8199985
  0.78800885  0.68025874  0.87498364  1.04287752  0.87478274  1.00899931
  0.61800796  1.21261299  1.20950088  0.42681911  1.02757891  0.97464513
  0.80160711  1.22497369  0.52038084  1.30453793  0.94939718  1.11215318
  1.06596003  0.90236568  0.47209132  0.45022722  1.5229408   0.40977724
  1.70491014  0.10000167  1.19639522  1.92400156  1.24073998  1.51756003
  0.99431697  1.0435268   2.0293887   0.69824828  1.32086388  0.99059771
  1.6430958   1.67975711  0.82385751  0.79376597  1.50520227  1.20338328
  1.24193177  1.24378858  0.45463165  1.02151632  0.78345531  1.20237313
  0.83343272  0.91129186  1.18261148  1.28485933  0.90876107  0.95917518
  0.62885089  0.98621816  1.35657093  2.06507111  1.07465549  0.66345171
  1.15702829  1.17500513  1.54888148  1.85826448  0.80600894  1.29674896
  0.91242745  0.94474336  1.61536611  0.68765848  1.74049096  1.35219588
  0.87627833  0.73066713  1.20378712  1.04854997  1.35383421  1.6149289
  1.55931112  0.84400299  0.83746182  1.11162958  1.26625169  0.93051591
  0.50539952  0.33047751  1.21857456  0.77251237  0.89153517  1.53560457
  1.04133414  1.07515065  1.70534631  0.72788984  0.92880654  2.2102405
  1.33538484  1.53009035  0.80052191  1.24904339  1.03475765  1.17070699
  0.74567519  1.02038038  1.45564434  1.11176862  0.83476973  1.14159006
  1.77199082  0.64467899  0.80810547  0.88092881  0.14309056  0.63668433
  0.72639553  0.8783846   1.29049563  0.93940489  1.09684094  0.83219512
  1.1441547   0.87636485  0.88714224  1.11417683  1.72850761  1.07499167
  1.80717741  1.09064679  0.72316063  0.84202101  1.54059738  0.88725068
  1.02988991  1.42334613  1.16435383  0.78583015  1.43420534  1.38669987
  1.18318179  0.93711628  1.02954403  1.11379338  0.70383355  1.58199845
  1.24048591  1.08348201  1.38187068  1.1771024   1.18706456  0.93786787
  1.15975336  0.59951043  0.69455362  0.94962925  1.08328437  0.81917704
  0.41735312  1.16271498  0.77960896  1.57334255  0.57473721  0.98769059
  1.70826441  0.89870563  1.18292907  1.10566517  0.90423195  0.81839855
  1.09366282  1.09154091  0.92744663  0.88383853  1.04742584  1.93620496
  1.63678806  0.97324755  0.80922092  1.36368375  0.80387449  1.54968819
  0.85352921  1.01274861  1.81405336  0.8007874   0.5458123   0.95119479
  1.01652986  0.74083604  1.65238805  1.20058022  0.86646356  1.05977489
  0.88078821  0.90607845  1.81719398  1.2263609   0.81080757  1.15384744
  0.62697939  0.87343725  0.56375508  1.08578884  0.96586217  0.83152635
  0.48479554  1.61677584  0.89979213  0.78116275  0.50202148  1.36473769
  0.46483208  1.06401168  0.99994976  0.97743196  1.08058319  1.00086384
  0.66672725  0.88716163  0.90019061  1.53510156  0.88686851  0.63406319
  0.93388518  0.40717566  0.99474876  1.67451481  1.32586558  0.91968137
  1.08357073  1.33126159  1.38084288  0.89497689  0.80829044  1.03328592
  1.27421438  1.21842976  1.07102856  0.48009406  0.53132158  0.52076586
  1.47318426  0.91875686  0.91359561  1.47331788  1.38533875  1.16941258
  1.16230476  0.67152028  0.89621495  0.44583487  0.61259679  0.93775463
  0.61246713  1.44927928  0.98354218  0.78662796  1.19199425  0.52674288
  1.4476564   0.56591104  0.90380028  0.45441841  1.1307893   0.87959624
  0.73039104  1.11621119]
Finished iteration: 0
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  2.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  0.  0.  2.  0.  2.  0.  1.  2.  1.  2.  1.  1.  2.  1.  1.  1.
  2.  2.  1.  1.  2.  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  2.  1.  1.  1.  1.  2.  2.  1.  1.  1.  1.  2.  1.  2.  1.
  1.  1.  1.  1.  1.  2.  2.  1.  1.  1.  1.  1.  1.  0.  1.  1.  1.  2.
  1.  1.  2.  1.  1.  2.  1.  2.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  2.  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  2.  1.
  2.  1.  1.  1.  2.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  2.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  2.  1.  1.
  2.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  2.  2.  1.  1.  1.  1.  2.
  1.  1.  2.  1.  1.  1.  1.  1.  2.  1.  1.  1.  1.  1.  2.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  0.  2.  1.  1.  1.  1.  0.  1.  1.  1.  1.  1.
  1.  1.  1.  2.  1.  1.  1.  0.  1.  2.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  1.  1.  1.  1.]
[ 2.  2.  2.  1.  2.  1.  1.  1.  1.  2.  2.  1.  1.  0.  0.  1.  1.  0.
  1.  1.  0.  1.  0.  1.  0.  2.  2.  1.  0.  0.  1.  2.  2.  1.  1.  2.
  0.  1.  1.  1.  2.  1.  2.  1.  0.  2.  1.  2.  1.  1.  2.  1.  1.  1.
  0.  0.  1.  1.  2.  0.  0.  1.  1.  1.  1.  1.  2.  1.  2.  1.  1.  0.
  1.  1.  2.  2.  1.  1.  2.  1.  2.  2.  1.  2.  1.  0.  2.  1.  2.  2.
  0.  2.  1.  0.  0.  2.  2.  2.  1.  2.  0.  0.  1.  1.  2.  1.  1.  0.
  2.  1.  2.  2.  1.  2.  2.  2.  1.  2.  0.  0.  2.  1.  0.  1.  1.  0.
  2.  1.  1.  1.  1.  1.  0.  1.  2.  1.  1.  1.  1.  1.  0.  2.  2.  1.
  2.  1.  1.  0.  2.  1.  1.  2.  1.  2.  1.  0.  0.  2.  1.  2.  1.  0.
  0.  0.  2.  1.  0.  2.  2.  1.  1.  0.  1.  2.  1.  1.  0.  2.  2.  0.
  2.  1.  2.  1.  1.  1.  1.  2.  0.  1.  1.  1.  0.  1.  1.  2.  0.  0.
  0.  1.  2.  1.  0.  1.  1.  0.  0.  2.  1.  2.  0.  0.  2.  1.  2.  0.
  1.  0.  0.  1.  1.  1.  0.  2.  0.  2.  0.  0.  1.  0.  2.  1.  1.  1.
  2.  0.  1.  2.  1.  1.  1.  1.  1.  2.  1.  2.  2.  2.  1.  2.  0.  1.
  2.  2.  2.  0.  1.  0.  1.  1.  0.  2.  2.  2.  0.  2.  0.  1.  0.  1.
  0.  2.  2.  1.  1.  1.  2.  0.  1.  1.  2.  1.  1.  0.]
The accuracy is 0.510563380282

	MId	Home_Team	Home_Team_Goals	Away_Team_Goals	Away_Team	Home_Poss	Away_Poss	Home_ShotsT	Away_ShotsT	Home_Shots	...	HTP	HTR	HTAR	HTMR	HTDR	ATP	ATR	ATAR	ATMR	ATDR
0	9601	CAR	1	2	CHE	36.8	63.2	3	7	10	...	14	775.406	151.256	269.512	354.638	14	883.673	166.000	315.499	402.174
1	9602	FUL	2	2	CRY	66.1	33.9	5	6	15	...	14	793.834	182.616	210.218	401.000	14	790.224	144.288	287.936	358.000
2	9603	HUL	0	2	EVE	43	57	3	4	12	...	14	792.189	216.189	219.000	357.000	14	862.968	173.566	293.402	396.000
3	9604	LIV	2	1	NUFC	66.2	33.8	5	2	13	...	14	884.043	269.651	220.392	394.000	14	827.956	84.175	294.260	449.521
4	9605	MCFC	2	0	WHU	68.1	31.9	7	0	28	...	14	912.543	155.313	350.230	407.000	14	808.489	129.325	315.164	364.000
5	9606	NOR	0	2	ARS	37.7	62.3	5	8	11	...	14	805.636	93.648	341.988	370.000	14	869.152	230.624	252.528	386.000
6	9607	SOT	1	1	MUFC	58.5	41.5	6	2	15	...	14	830.718	157.239	296.034	377.445	14	883.726	251.335	229.391	403.000
7	9608	SUN	1	3	SWAN	53.3	46.7	4	4	20	...	14	798.745	76.958	361.257	360.530	14	795.936	278.427	163.175	354.334
8	9609	TOT	3	0	AVL	54.7	45.3	6	1	12	...	14	841.663	148.000	291.936	401.727	13	800.032	181.180	183.000	435.852
9	9610	WBA	1	2	STK	43	57	4	4	18	...	14	810.201	166.403	349.500	294.298	14	793.845	85.944	349.901	358.000

	Home_Poss	Away_Poss	Home_ShotsT	Away_ShotsT	Home_Shots	Away_Shots	Home_Touches	Away_Touches	Home_Passes	Away_Passes	...	HTP	HTR	HTAR	HTMR	HTDR	ATP	ATR	ATAR	ATMR	ATDR
Home_Poss	1.000000	-1.000000	0.316120	-0.315443	0.524575	-0.475808	0.895654	-0.897972	0.877316	-0.884429	...	0.025225	0.464898	0.190234	0.042298	0.065889	0.005962	-0.473204	-0.240212	0.003324	-0.066883
Away_Poss	-1.000000	1.000000	-0.316120	0.315443	-0.524575	0.475808	-0.895654	0.897972	-0.877316	0.884429	...	-0.025225	-0.464898	-0.190234	-0.042298	-0.065889	-0.005962	0.473204	0.240212	-0.003324	0.066883
Home_ShotsT	0.316120	-0.316120	1.000000	-0.173285	0.665978	-0.250153	0.332443	-0.279791	0.315805	-0.266711	...	0.053084	0.330653	0.133486	0.005930	0.086834	0.070499	-0.219733	-0.112282	0.001539	-0.029892
Away_ShotsT	-0.315443	0.315443	-0.173285	1.000000	-0.229451	0.630199	-0.277174	0.326558	-0.284377	0.299902	...	0.027838	-0.195858	-0.073407	-0.008440	-0.052068	-0.010597	0.323539	0.106688	0.010279	0.115561
Home_Shots	0.524575	-0.524575	0.665978	-0.229451	1.000000	-0.354441	0.461211	-0.493154	0.419769	-0.493644	...	-0.014093	0.309978	0.139142	0.010991	0.052610	0.027267	-0.305942	-0.119271	-0.040202	-0.031136
Away_Shots	-0.475808	0.475808	-0.250153	0.630199	-0.354441	1.000000	-0.436438	0.438792	-0.443691	0.396951	...	0.006007	-0.274016	-0.086549	-0.057355	-0.025990	-0.031674	0.302355	0.131967	-0.009907	0.089040
Home_Touches	0.895654	-0.895654	0.332443	-0.277174	0.461211	-0.436438	1.000000	-0.646915	0.980318	-0.625277	...	0.038890	0.558134	0.213400	0.029445	0.133905	0.019861	-0.327717	-0.200768	0.018656	-0.018890
Away_Touches	-0.897972	0.897972	-0.279791	0.326558	-0.493154	0.438792	-0.646915	1.000000	-0.625229	0.977526	...	-0.004221	-0.320504	-0.153847	-0.040757	0.005563	0.003360	0.554541	0.247586	0.010373	0.108410
Home_Passes	0.877316	-0.877316	0.315805	-0.284377	0.419769	-0.443691	0.980318	-0.625229	1.000000	-0.577590	...	0.052318	0.561553	0.199638	0.044998	0.132922	0.020404	-0.323569	-0.204948	0.029555	-0.026147
Away_Passes	-0.884429	0.884429	-0.266711	0.299902	-0.493644	0.396951	-0.625277	0.977526	-0.577590	1.000000	...	0.001624	-0.295116	-0.156138	-0.025143	0.007073	0.007578	0.540404	0.227474	0.025252	0.102752
Home_Tackles	-0.172459	0.172459	-0.012261	0.070435	-0.085394	0.047551	-0.074807	0.246762	-0.134380	0.157641	...	0.013462	0.077214	0.033283	-0.007918	0.031578	-0.013654	0.200828	0.035920	0.054348	0.041557
Away_Tackles	0.169691	-0.169691	-0.019757	-0.001110	0.003944	-0.107036	0.232605	-0.079300	0.139685	-0.147408	...	-0.014019	0.164441	0.003048	0.047440	0.066700	0.011347	0.052493	-0.009029	0.029952	0.014225
Home_Clearances	-0.277403	0.277403	-0.095408	0.022782	-0.188163	0.262887	-0.269608	0.143354	-0.346759	0.095620	...	-0.045484	-0.197748	0.033539	-0.135742	-0.012801	-0.020916	-0.002795	0.084873	-0.095043	0.018308
Away_Clearances	0.214161	-0.214161	-0.010117	-0.027719	0.215589	-0.118271	0.073087	-0.200855	0.010092	-0.306799	...	-0.036738	-0.167104	0.043588	-0.081123	-0.084927	-0.008852	-0.107777	0.045625	-0.110708	0.008021
Home_Corners	0.441836	-0.441836	0.267085	-0.183954	0.495146	-0.317978	0.335527	-0.414290	0.295668	-0.436616	...	-0.027674	0.198027	0.057102	0.023322	0.054729	-0.027624	-0.228946	-0.071335	-0.037770	-0.038928
Away_Corners	-0.362764	0.362764	-0.163933	0.244439	-0.261911	0.498755	-0.354564	0.267462	-0.374861	0.228546	...	0.015213	-0.132627	-0.026517	-0.062478	0.018675	-0.046770	0.210584	0.087795	0.029492	0.009556
Home_Offsides	-0.017331	0.017331	0.026309	-0.024810	-0.018583	-0.049601	-0.004910	0.002334	-0.020296	0.006835	...	0.058818	0.044848	0.033794	-0.006883	0.000802	-0.007569	0.059502	-0.037806	0.022949	0.077124
Away_Offsides	-0.016569	0.016569	0.011807	0.038690	-0.009970	-0.032591	-0.027856	0.016967	-0.027339	-0.002533	...	-0.006855	-0.016913	-0.005802	-0.032585	0.043951	0.009871	0.051386	0.031946	0.001091	-0.004269
HTP	0.025225	-0.025225	0.053084	0.027838	-0.014093	0.006007	0.038890	-0.004221	0.052318	0.001624	...	1.000000	0.056394	-0.062121	0.083333	0.011258	0.033470	-0.054241	-0.053437	0.013525	0.011634
HTR	0.464898	-0.464898	0.330653	-0.195858	0.309978	-0.274016	0.558134	-0.320504	0.561553	-0.295116	...	0.056394	1.000000	0.204030	0.143205	0.359908	0.001965	-0.014041	-0.116419	0.083728	0.033848
HTAR	0.190234	-0.190234	0.133486	-0.073407	0.139142	-0.086549	0.213400	-0.153847	0.199638	-0.156138	...	-0.062121	0.204030	1.000000	-0.748693	-0.120574	0.042061	-0.054006	-0.033736	0.024187	-0.036268
HTMR	0.042298	-0.042298	0.005930	-0.008440	0.010991	-0.057355	0.029445	-0.040757	0.044998	-0.025143	...	0.083333	0.143205	-0.748693	1.000000	-0.328857	-0.031348	0.035144	-0.006556	0.008865	0.028434
HTDR	0.065889	-0.065889	0.086834	-0.052068	0.052610	-0.025990	0.133905	0.005563	0.132922	0.007073	...	0.011258	0.359908	-0.120574	-0.328857	1.000000	-0.011105	0.012031	-0.042989	0.024615	0.038627
ATP	0.005962	-0.005962	0.070499	-0.010597	0.027267	-0.031674	0.019861	0.003360	0.020404	0.007578	...	0.033470	0.001965	0.042061	-0.031348	-0.011105	1.000000	0.026136	-0.046073	0.074177	-0.023717
ATR	-0.473204	0.473204	-0.219733	0.323539	-0.305942	0.302355	-0.327717	0.554541	-0.323569	0.540404	...	-0.054241	-0.014041	-0.054006	0.035144	0.012031	0.026136	1.000000	0.205996	0.166793	0.332486
ATAR	-0.240212	0.240212	-0.112282	0.106688	-0.119271	0.131967	-0.200768	0.247586	-0.204948	0.227474	...	-0.053437	-0.116419	-0.033736	-0.006556	-0.042989	-0.046073	0.205996	1.000000	-0.755884	-0.152354
ATMR	0.003324	-0.003324	0.001539	0.010279	-0.040202	-0.009907	0.018656	0.010373	0.029555	0.025252	...	0.013525	0.083728	0.024187	0.008865	0.024615	0.074177	0.166793	-0.755884	1.000000	-0.280309
ATDR	-0.066883	0.066883	-0.029892	0.115561	-0.031136	0.089040	-0.018890	0.108410	-0.026147	0.102752	...	0.011634	0.033848	-0.036268	0.028434	0.038627	-0.023717	0.332486	-0.152354	-0.280309	1.000000