Predicting the Outcome of Cricket Matches



In [2]:

    
%matplotlib inline 
import numpy as np # imports a fast numerical programming library
import pandas as pd #lets us handle data as dataframes
#sets up pandas table display
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from patsy import dmatrices



In [3]:

    
matches = pd.read_csv("../data/matcheswithfeatures.csv", index_col = 0)



In [4]:

    
y, X = dmatrices('team1Winning ~ 0 + Avg_SR_Difference + Avg_WPR_Difference + Total_MVP_Difference + Prev_Enc_Team1_WinPerc + \
                  Total_RF_Difference', matches, return_type="dataframe")
y_arr = np.ravel(y)



In [5]:

    
X









    Out[5]:







  
    
      
      Avg_SR_Difference
      Avg_WPR_Difference
      Total_MVP_Difference
      Prev_Enc_Team1_WinPerc
      Total_RF_Difference
    
  
  
    
      5
      55.665975
      1.414786
      0.0
      0.000000
      0.000000
    
    
      7
      6.135734
      -1.591368
      1.0
      0.000000
      100.000000
    
    
      9
      4.666844
      0.111379
      0.0
      0.000000
      0.000000
    
    
      10
      25.388743
      -0.021123
      0.0
      0.000000
      0.000000
    
    
      11
      -28.438618
      11.723738
      0.0
      0.000000
      0.000000
    
    
      13
      41.221731
      6.066625
      0.0
      0.000000
      66.666667
    
    
      14
      37.233069
      0.581470
      2.0
      0.000000
      66.666667
    
    
      15
      -13.582248
      1.010938
      1.0
      0.000000
      50.000000
    
    
      16
      15.293648
      2.058102
      -1.0
      0.000000
      16.666667
    
    
      18
      40.069300
      -2.720529
      2.0
      0.000000
      33.333333
    
    
      19
      -6.529304
      0.155223
      0.0
      0.000000
      33.333333
    
    
      21
      60.276090
      1.503388
      1.0
      0.000000
      66.666667
    
    
      22
      -36.759577
      -0.140660
      -1.0
      0.000000
      -33.333333
    
    
      23
      -3.740886
      -1.545548
      -2.0
      0.000000
      -33.333333
    
    
      24
      -29.919482
      -1.732469
      -1.0
      0.000000
      -100.000000
    
    
      26
      -1.150869
      1.681456
      2.0
      0.000000
      33.333333
    
    
      27
      4.157345
      0.525677
      1.0
      100.000000
      33.333333
    
    
      28
      -11.720957
      2.154708
      -1.0
      100.000000
      0.000000
    
    
      30
      -12.795080
      -3.123743
      -2.0
      100.000000
      -100.000000
    
    
      31
      15.090854
      -2.420466
      0.0
      100.000000
      -66.666667
    
    
      33
      16.965741
      0.337565
      1.0
      100.000000
      -33.333333
    
    
      34
      -39.254979
      -1.533140
      -1.0
      0.000000
      -66.666667
    
    
      35
      -30.450448
      -0.062459
      -2.0
      0.000000
      33.333333
    
    
      36
      9.397768
      1.087380
      0.0
      100.000000
      -33.333333
    
    
      37
      5.965139
      0.737928
      0.0
      0.000000
      0.000000
    
    
      39
      -20.297425
      10.328566
      -2.0
      0.000000
      -33.333333
    
    
      41
      -1.229378
      2.296430
      5.0
      100.000000
      33.333333
    
    
      44
      -19.002114
      -1.367688
      -3.0
      0.000000
      0.000000
    
    
      45
      -9.501326
      -1.699759
      -5.0
      0.000000
      0.000000
    
    
      46
      17.779892
      1.135235
      -2.0
      100.000000
      -33.333333
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      474
      -6.548623
      0.610484
      -4.0
      53.333333
      -100.000000
    
    
      475
      -11.068650
      -0.136905
      -22.0
      46.153846
      -33.333333
    
    
      478
      7.048118
      -1.246590
      22.0
      56.250000
      -16.666667
    
    
      479
      -0.596885
      0.020515
      -11.0
      50.000000
      0.000000
    
    
      480
      17.723755
      0.354974
      3.0
      53.846154
      66.666667
    
    
      482
      -5.061380
      -1.212153
      7.0
      53.846154
      33.333333
    
    
      483
      -5.004302
      0.906597
      -4.0
      41.666667
      33.333333
    
    
      485
      2.012543
      0.366985
      21.0
      57.142857
      66.666667
    
    
      487
      2.910151
      0.425559
      7.0
      61.538462
      -33.333333
    
    
      488
      14.275080
      0.459693
      15.0
      61.538462
      -33.333333
    
    
      489
      -13.536521
      0.097342
      5.0
      60.000000
      -33.333333
    
    
      490
      4.510188
      -0.570758
      7.0
      57.142857
      0.000000
    
    
      492
      -13.615487
      -0.737024
      -1.0
      46.666667
      66.666667
    
    
      493
      10.775508
      -0.057722
      7.0
      60.000000
      0.000000
    
    
      494
      4.569589
      -0.231439
      25.0
      58.823529
      0.000000
    
    
      496
      -18.726971
      -0.755737
      -14.0
      53.333333
      -33.333333
    
    
      497
      -5.574733
      0.886535
      5.0
      35.714286
      66.666667
    
    
      499
      5.014283
      1.972348
      17.0
      52.631579
      0.000000
    
    
      500
      6.415078
      0.254813
      -18.0
      40.000000
      -33.333333
    
    
      502
      -1.343308
      2.062729
      -3.0
      43.750000
      -33.333333
    
    
      503
      -5.738591
      -0.046456
      16.0
      62.500000
      33.333333
    
    
      505
      6.941454
      1.678318
      28.0
      73.333333
      33.333333
    
    
      506
      5.622383
      -1.324729
      -16.0
      60.000000
      -66.666667
    
    
      507
      -0.677689
      -0.313345
      -11.0
      66.666667
      33.333333
    
    
      509
      -0.716536
      1.824407
      -33.0
      42.857143
      0.000000
    
    
      510
      -3.303823
      -0.271935
      -16.0
      50.000000
      0.000000
    
    
      513
      6.315981
      -0.617777
      -24.0
      50.000000
      0.000000
    
    
      514
      -2.200375
      0.969143
      5.0
      50.000000
      0.000000
    
    
      515
      -0.521025
      1.039181
      -23.0
      38.888889
      33.333333
    
    
      516
      -1.575550
      -1.707931
      -24.0
      52.380952
      0.000000
    
  

327 rows × 5 columns

Splitting Training Set (2008-2013) and Test Set (2013-2015) based on Seasons



In [6]:

    
X_timetrain = X.loc[X.index < 398]
Y_timetrain = y.loc[y.index < 398]
Y_timetrain_arr = np.ravel(Y_timetrain)
X_timetest = X.loc[X.index >= 398]
Y_timetest = y.loc[y.index >= 398]
Y_timetest_arr = np.ravel(Y_timetest)



In [7]:

    
# Best values of k in time-based split data
knn1 = KNeighborsClassifier(n_neighbors = 31)
knn1.fit(X_timetrain, Y_timetrain_arr)
y_pred = knn1.predict(X_timetest)
print "Accuracy is ", metrics.accuracy_score(Y_timetest_arr, y_pred)*100, "%"









    



Accuracy is  64.367816092 %



In [8]:

    
X_timetest









    Out[8]:







  
    
      
      Avg_SR_Difference
      Avg_WPR_Difference
      Total_MVP_Difference
      Prev_Enc_Team1_WinPerc
      Total_RF_Difference
    
  
  
    
      398
      -9.646646
      0.466526
      6.0
      16.666667
      0.000000
    
    
      399
      4.963605
      0.097800
      12.0
      50.000000
      0.000000
    
    
      400
      7.079810
      0.432566
      11.0
      70.000000
      0.000000
    
    
      402
      21.485599
      1.176414
      17.0
      53.846154
      -100.000000
    
    
      403
      -4.503334
      1.663169
      15.0
      54.545455
      100.000000
    
    
      404
      -7.297630
      -0.332117
      -1.0
      72.727273
      -100.000000
    
    
      405
      12.183316
      2.316918
      5.0
      66.666667
      -50.000000
    
    
      407
      -5.341707
      2.620287
      12.0
      61.538462
      50.000000
    
    
      408
      5.093091
      0.588349
      20.0
      54.545455
      -50.000000
    
    
      410
      -13.668459
      -2.328697
      0.0
      60.000000
      -66.666667
    
    
      411
      15.451031
      0.903107
      -2.0
      54.545455
      66.666667
    
    
      412
      16.852669
      -1.198669
      -19.0
      50.000000
      33.333333
    
    
      413
      -6.674135
      2.225351
      8.0
      50.000000
      -33.333333
    
    
      415
      -19.344665
      -2.002792
      4.0
      41.666667
      -66.666667
    
    
      418
      12.797760
      0.233431
      -7.0
      70.000000
      66.666667
    
    
      419
      7.351864
      4.156536
      -1.0
      58.333333
      100.000000
    
    
      420
      5.659837
      -2.136710
      4.0
      50.000000
      33.333333
    
    
      422
      7.195161
      -0.935038
      -8.0
      45.454545
      33.333333
    
    
      423
      -12.464962
      -2.197910
      -7.0
      30.769231
      -66.666667
    
    
      424
      -3.504686
      0.247855
      -1.0
      50.000000
      33.333333
    
    
      425
      5.508879
      -1.167825
      -9.0
      50.000000
      33.333333
    
    
      426
      8.303271
      1.050310
      -14.0
      36.363636
      -33.333333
    
    
      428
      35.077496
      1.280748
      -1.0
      61.538462
      66.666667
    
    
      430
      -7.544654
      0.752493
      -8.0
      56.250000
      0.000000
    
    
      431
      21.871322
      0.886981
      -11.0
      54.545455
      33.333333
    
    
      432
      0.003466
      -1.131751
      3.0
      50.000000
      -100.000000
    
    
      434
      11.859779
      1.592142
      -11.0
      35.714286
      33.333333
    
    
      435
      2.600434
      1.780807
      5.0
      54.545455
      0.000000
    
    
      437
      -8.157091
      -0.853403
      -7.0
      76.923077
      0.000000
    
    
      438
      13.456476
      2.440403
      -4.0
      53.846154
      66.666667
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      474
      -6.548623
      0.610484
      -4.0
      53.333333
      -100.000000
    
    
      475
      -11.068650
      -0.136905
      -22.0
      46.153846
      -33.333333
    
    
      478
      7.048118
      -1.246590
      22.0
      56.250000
      -16.666667
    
    
      479
      -0.596885
      0.020515
      -11.0
      50.000000
      0.000000
    
    
      480
      17.723755
      0.354974
      3.0
      53.846154
      66.666667
    
    
      482
      -5.061380
      -1.212153
      7.0
      53.846154
      33.333333
    
    
      483
      -5.004302
      0.906597
      -4.0
      41.666667
      33.333333
    
    
      485
      2.012543
      0.366985
      21.0
      57.142857
      66.666667
    
    
      487
      2.910151
      0.425559
      7.0
      61.538462
      -33.333333
    
    
      488
      14.275080
      0.459693
      15.0
      61.538462
      -33.333333
    
    
      489
      -13.536521
      0.097342
      5.0
      60.000000
      -33.333333
    
    
      490
      4.510188
      -0.570758
      7.0
      57.142857
      0.000000
    
    
      492
      -13.615487
      -0.737024
      -1.0
      46.666667
      66.666667
    
    
      493
      10.775508
      -0.057722
      7.0
      60.000000
      0.000000
    
    
      494
      4.569589
      -0.231439
      25.0
      58.823529
      0.000000
    
    
      496
      -18.726971
      -0.755737
      -14.0
      53.333333
      -33.333333
    
    
      497
      -5.574733
      0.886535
      5.0
      35.714286
      66.666667
    
    
      499
      5.014283
      1.972348
      17.0
      52.631579
      0.000000
    
    
      500
      6.415078
      0.254813
      -18.0
      40.000000
      -33.333333
    
    
      502
      -1.343308
      2.062729
      -3.0
      43.750000
      -33.333333
    
    
      503
      -5.738591
      -0.046456
      16.0
      62.500000
      33.333333
    
    
      505
      6.941454
      1.678318
      28.0
      73.333333
      33.333333
    
    
      506
      5.622383
      -1.324729
      -16.0
      60.000000
      -66.666667
    
    
      507
      -0.677689
      -0.313345
      -11.0
      66.666667
      33.333333
    
    
      509
      -0.716536
      1.824407
      -33.0
      42.857143
      0.000000
    
    
      510
      -3.303823
      -0.271935
      -16.0
      50.000000
      0.000000
    
    
      513
      6.315981
      -0.617777
      -24.0
      50.000000
      0.000000
    
    
      514
      -2.200375
      0.969143
      5.0
      50.000000
      0.000000
    
    
      515
      -0.521025
      1.039181
      -23.0
      38.888889
      33.333333
    
    
      516
      -1.575550
      -1.707931
      -24.0
      52.380952
      0.000000
    
  

87 rows × 5 columns



In [21]:

    
def getPrediction(match_id):
    '''Returns the prediction for the given match
    
    Args: match_id (int): Match ID for the required game
    
    Returns: String: Predicted winner of the game and probability of victory 
    '''
    try:
        assert (399 <= match_id <= 517)
        results = {}
        match_row = matches.loc[matches['id'] == match_id]
        team1name = match_row.team1.unique()[0]
        team2name = match_row.team2.unique()[0]
        toPredict = X_timetest.loc[X_timetest.index == match_id-1].values
        prediction_prob = knn1.predict_proba(toPredict)
        prediction = knn1.predict(toPredict)
        if prediction[0] > 0:
            results['name'] = str(team1name)
            results['prob'] = float(prediction_prob[0][1])*100
        else:
            results['name'] = str(team2name)
            results['prob'] = float(prediction_prob[0][0])*100
        return results
    except AssertionError:
        return None;



In [23]:

    
print getPrediction(617)









    



None

	Avg_SR_Difference	Avg_WPR_Difference	Total_MVP_Difference	Prev_Enc_Team1_WinPerc	Total_RF_Difference
5	55.665975	1.414786	0.0	0.000000	0.000000
7	6.135734	-1.591368	1.0	0.000000	100.000000
9	4.666844	0.111379	0.0	0.000000	0.000000
10	25.388743	-0.021123	0.0	0.000000	0.000000
11	-28.438618	11.723738	0.0	0.000000	0.000000
13	41.221731	6.066625	0.0	0.000000	66.666667
14	37.233069	0.581470	2.0	0.000000	66.666667
15	-13.582248	1.010938	1.0	0.000000	50.000000
16	15.293648	2.058102	-1.0	0.000000	16.666667
18	40.069300	-2.720529	2.0	0.000000	33.333333
19	-6.529304	0.155223	0.0	0.000000	33.333333
21	60.276090	1.503388	1.0	0.000000	66.666667
22	-36.759577	-0.140660	-1.0	0.000000	-33.333333
23	-3.740886	-1.545548	-2.0	0.000000	-33.333333
24	-29.919482	-1.732469	-1.0	0.000000	-100.000000
26	-1.150869	1.681456	2.0	0.000000	33.333333
27	4.157345	0.525677	1.0	100.000000	33.333333
28	-11.720957	2.154708	-1.0	100.000000	0.000000
30	-12.795080	-3.123743	-2.0	100.000000	-100.000000
31	15.090854	-2.420466	0.0	100.000000	-66.666667
33	16.965741	0.337565	1.0	100.000000	-33.333333
34	-39.254979	-1.533140	-1.0	0.000000	-66.666667
35	-30.450448	-0.062459	-2.0	0.000000	33.333333
36	9.397768	1.087380	0.0	100.000000	-33.333333
37	5.965139	0.737928	0.0	0.000000	0.000000
39	-20.297425	10.328566	-2.0	0.000000	-33.333333
41	-1.229378	2.296430	5.0	100.000000	33.333333
44	-19.002114	-1.367688	-3.0	0.000000	0.000000
45	-9.501326	-1.699759	-5.0	0.000000	0.000000
46	17.779892	1.135235	-2.0	100.000000	-33.333333
...	...	...	...	...	...
474	-6.548623	0.610484	-4.0	53.333333	-100.000000
475	-11.068650	-0.136905	-22.0	46.153846	-33.333333
478	7.048118	-1.246590	22.0	56.250000	-16.666667
479	-0.596885	0.020515	-11.0	50.000000	0.000000
480	17.723755	0.354974	3.0	53.846154	66.666667
482	-5.061380	-1.212153	7.0	53.846154	33.333333
483	-5.004302	0.906597	-4.0	41.666667	33.333333
485	2.012543	0.366985	21.0	57.142857	66.666667
487	2.910151	0.425559	7.0	61.538462	-33.333333
488	14.275080	0.459693	15.0	61.538462	-33.333333
489	-13.536521	0.097342	5.0	60.000000	-33.333333
490	4.510188	-0.570758	7.0	57.142857	0.000000
492	-13.615487	-0.737024	-1.0	46.666667	66.666667
493	10.775508	-0.057722	7.0	60.000000	0.000000
494	4.569589	-0.231439	25.0	58.823529	0.000000
496	-18.726971	-0.755737	-14.0	53.333333	-33.333333
497	-5.574733	0.886535	5.0	35.714286	66.666667
499	5.014283	1.972348	17.0	52.631579	0.000000
500	6.415078	0.254813	-18.0	40.000000	-33.333333
502	-1.343308	2.062729	-3.0	43.750000	-33.333333
503	-5.738591	-0.046456	16.0	62.500000	33.333333
505	6.941454	1.678318	28.0	73.333333	33.333333
506	5.622383	-1.324729	-16.0	60.000000	-66.666667
507	-0.677689	-0.313345	-11.0	66.666667	33.333333
509	-0.716536	1.824407	-33.0	42.857143	0.000000
510	-3.303823	-0.271935	-16.0	50.000000	0.000000
513	6.315981	-0.617777	-24.0	50.000000	0.000000
514	-2.200375	0.969143	5.0	50.000000	0.000000
515	-0.521025	1.039181	-23.0	38.888889	33.333333
516	-1.575550	-1.707931	-24.0	52.380952	0.000000

	Avg_SR_Difference	Avg_WPR_Difference	Total_MVP_Difference	Prev_Enc_Team1_WinPerc	Total_RF_Difference
398	-9.646646	0.466526	6.0	16.666667	0.000000
399	4.963605	0.097800	12.0	50.000000	0.000000
400	7.079810	0.432566	11.0	70.000000	0.000000
402	21.485599	1.176414	17.0	53.846154	-100.000000
403	-4.503334	1.663169	15.0	54.545455	100.000000
404	-7.297630	-0.332117	-1.0	72.727273	-100.000000
405	12.183316	2.316918	5.0	66.666667	-50.000000
407	-5.341707	2.620287	12.0	61.538462	50.000000
408	5.093091	0.588349	20.0	54.545455	-50.000000
410	-13.668459	-2.328697	0.0	60.000000	-66.666667
411	15.451031	0.903107	-2.0	54.545455	66.666667
412	16.852669	-1.198669	-19.0	50.000000	33.333333
413	-6.674135	2.225351	8.0	50.000000	-33.333333
415	-19.344665	-2.002792	4.0	41.666667	-66.666667
418	12.797760	0.233431	-7.0	70.000000	66.666667
419	7.351864	4.156536	-1.0	58.333333	100.000000
420	5.659837	-2.136710	4.0	50.000000	33.333333
422	7.195161	-0.935038	-8.0	45.454545	33.333333
423	-12.464962	-2.197910	-7.0	30.769231	-66.666667
424	-3.504686	0.247855	-1.0	50.000000	33.333333
425	5.508879	-1.167825	-9.0	50.000000	33.333333
426	8.303271	1.050310	-14.0	36.363636	-33.333333
428	35.077496	1.280748	-1.0	61.538462	66.666667
430	-7.544654	0.752493	-8.0	56.250000	0.000000
431	21.871322	0.886981	-11.0	54.545455	33.333333
432	0.003466	-1.131751	3.0	50.000000	-100.000000
434	11.859779	1.592142	-11.0	35.714286	33.333333
435	2.600434	1.780807	5.0	54.545455	0.000000
437	-8.157091	-0.853403	-7.0	76.923077	0.000000
438	13.456476	2.440403	-4.0	53.846154	66.666667
...	...	...	...	...	...
474	-6.548623	0.610484	-4.0	53.333333	-100.000000
475	-11.068650	-0.136905	-22.0	46.153846	-33.333333
478	7.048118	-1.246590	22.0	56.250000	-16.666667
479	-0.596885	0.020515	-11.0	50.000000	0.000000
480	17.723755	0.354974	3.0	53.846154	66.666667
482	-5.061380	-1.212153	7.0	53.846154	33.333333
483	-5.004302	0.906597	-4.0	41.666667	33.333333
485	2.012543	0.366985	21.0	57.142857	66.666667
487	2.910151	0.425559	7.0	61.538462	-33.333333
488	14.275080	0.459693	15.0	61.538462	-33.333333
489	-13.536521	0.097342	5.0	60.000000	-33.333333
490	4.510188	-0.570758	7.0	57.142857	0.000000
492	-13.615487	-0.737024	-1.0	46.666667	66.666667
493	10.775508	-0.057722	7.0	60.000000	0.000000
494	4.569589	-0.231439	25.0	58.823529	0.000000
496	-18.726971	-0.755737	-14.0	53.333333	-33.333333
497	-5.574733	0.886535	5.0	35.714286	66.666667
499	5.014283	1.972348	17.0	52.631579	0.000000
500	6.415078	0.254813	-18.0	40.000000	-33.333333
502	-1.343308	2.062729	-3.0	43.750000	-33.333333
503	-5.738591	-0.046456	16.0	62.500000	33.333333
505	6.941454	1.678318	28.0	73.333333	33.333333
506	5.622383	-1.324729	-16.0	60.000000	-66.666667
507	-0.677689	-0.313345	-11.0	66.666667	33.333333
509	-0.716536	1.824407	-33.0	42.857143	0.000000
510	-3.303823	-0.271935	-16.0	50.000000	0.000000
513	6.315981	-0.617777	-24.0	50.000000	0.000000
514	-2.200375	0.969143	5.0	50.000000	0.000000
515	-0.521025	1.039181	-23.0	38.888889	33.333333
516	-1.575550	-1.707931	-24.0	52.380952	0.000000