In [12]:
#points matplot lib to plot the figure inside notebook
#pandas is a data science library
#matplotlib: for data plotting
#matplotlib.pyplot : to define a plot, part of
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
In [3]:
#pandas work with data frames
#read_csv
df = pd.read_csv('ScoreBoardFinal.csv')
df.head(10)
Out[3]:
MId
Home_Team
Home_Team_Goals
Away_Team_Goals
Away_Team
Home_Poss
Away_Poss
Home_ShotsT
Away_ShotsT
Home_Shots
...
HTP
HTR
HTAR
HTMR
HTDR
ATP
ATR
ATAR
ATMR
ATDR
0
9601
CAR
1
2
CHE
36.8
63.2
3
7
10
...
14
775.406
151.256
269.512
354.638
14
883.673
166.000
315.499
402.174
1
9602
FUL
2
2
CRY
66.1
33.9
5
6
15
...
14
793.834
182.616
210.218
401.000
14
790.224
144.288
287.936
358.000
2
9603
HUL
0
2
EVE
43
57
3
4
12
...
14
792.189
216.189
219.000
357.000
14
862.968
173.566
293.402
396.000
3
9604
LIV
2
1
NUFC
66.2
33.8
5
2
13
...
14
884.043
269.651
220.392
394.000
14
827.956
84.175
294.260
449.521
4
9605
MCFC
2
0
WHU
68.1
31.9
7
0
28
...
14
912.543
155.313
350.230
407.000
14
808.489
129.325
315.164
364.000
5
9606
NOR
0
2
ARS
37.7
62.3
5
8
11
...
14
805.636
93.648
341.988
370.000
14
869.152
230.624
252.528
386.000
6
9607
SOT
1
1
MUFC
58.5
41.5
6
2
15
...
14
830.718
157.239
296.034
377.445
14
883.726
251.335
229.391
403.000
7
9608
SUN
1
3
SWAN
53.3
46.7
4
4
20
...
14
798.745
76.958
361.257
360.530
14
795.936
278.427
163.175
354.334
8
9609
TOT
3
0
AVL
54.7
45.3
6
1
12
...
14
841.663
148.000
291.936
401.727
13
800.032
181.180
183.000
435.852
9
9610
WBA
1
2
STK
43
57
4
4
18
...
14
810.201
166.403
349.500
294.298
14
793.845
85.944
349.901
358.000
10 rows × 33 columns
In [6]:
#.loc returns the rows based on the filters or conditions
df_tmp = df.loc[df['Home_ShotsT'] != 'XX-XX-']
df_tmp.head(10)
Out[6]:
MId
Home_Team
Home_Team_Goals
Away_Team_Goals
Away_Team
Home_Poss
Away_Poss
Home_ShotsT
Away_ShotsT
Home_Shots
...
HTP
HTR
HTAR
HTMR
HTDR
ATP
ATR
ATAR
ATMR
ATDR
0
9601
CAR
1
2
CHE
36.8
63.2
3
7
10
...
14
775.406
151.256
269.512
354.638
14
883.673
166.000
315.499
402.174
1
9602
FUL
2
2
CRY
66.1
33.9
5
6
15
...
14
793.834
182.616
210.218
401.000
14
790.224
144.288
287.936
358.000
2
9603
HUL
0
2
EVE
43
57
3
4
12
...
14
792.189
216.189
219.000
357.000
14
862.968
173.566
293.402
396.000
3
9604
LIV
2
1
NUFC
66.2
33.8
5
2
13
...
14
884.043
269.651
220.392
394.000
14
827.956
84.175
294.260
449.521
4
9605
MCFC
2
0
WHU
68.1
31.9
7
0
28
...
14
912.543
155.313
350.230
407.000
14
808.489
129.325
315.164
364.000
5
9606
NOR
0
2
ARS
37.7
62.3
5
8
11
...
14
805.636
93.648
341.988
370.000
14
869.152
230.624
252.528
386.000
6
9607
SOT
1
1
MUFC
58.5
41.5
6
2
15
...
14
830.718
157.239
296.034
377.445
14
883.726
251.335
229.391
403.000
7
9608
SUN
1
3
SWAN
53.3
46.7
4
4
20
...
14
798.745
76.958
361.257
360.530
14
795.936
278.427
163.175
354.334
8
9609
TOT
3
0
AVL
54.7
45.3
6
1
12
...
14
841.663
148.000
291.936
401.727
13
800.032
181.180
183.000
435.852
9
9610
WBA
1
2
STK
43
57
4
4
18
...
14
810.201
166.403
349.500
294.298
14
793.845
85.944
349.901
358.000
10 rows × 33 columns
In [23]:
df = df_tmp.ix[:,'Home_Poss':]
df = df.apply(lambda x: pd.to_numeric(x, errors = 'ignore'))
names = df.columns
correlations = df.corr()
# plot correlation matrix
correlations
Out[23]:
Home_Poss
Away_Poss
Home_ShotsT
Away_ShotsT
Home_Shots
Away_Shots
Home_Touches
Away_Touches
Home_Passes
Away_Passes
...
HTP
HTR
HTAR
HTMR
HTDR
ATP
ATR
ATAR
ATMR
ATDR
Home_Poss
1.000000
-1.000000
0.316120
-0.315443
0.524575
-0.475808
0.895654
-0.897972
0.877316
-0.884429
...
0.025225
0.464898
0.190234
0.042298
0.065889
0.005962
-0.473204
-0.240212
0.003324
-0.066883
Away_Poss
-1.000000
1.000000
-0.316120
0.315443
-0.524575
0.475808
-0.895654
0.897972
-0.877316
0.884429
...
-0.025225
-0.464898
-0.190234
-0.042298
-0.065889
-0.005962
0.473204
0.240212
-0.003324
0.066883
Home_ShotsT
0.316120
-0.316120
1.000000
-0.173285
0.665978
-0.250153
0.332443
-0.279791
0.315805
-0.266711
...
0.053084
0.330653
0.133486
0.005930
0.086834
0.070499
-0.219733
-0.112282
0.001539
-0.029892
Away_ShotsT
-0.315443
0.315443
-0.173285
1.000000
-0.229451
0.630199
-0.277174
0.326558
-0.284377
0.299902
...
0.027838
-0.195858
-0.073407
-0.008440
-0.052068
-0.010597
0.323539
0.106688
0.010279
0.115561
Home_Shots
0.524575
-0.524575
0.665978
-0.229451
1.000000
-0.354441
0.461211
-0.493154
0.419769
-0.493644
...
-0.014093
0.309978
0.139142
0.010991
0.052610
0.027267
-0.305942
-0.119271
-0.040202
-0.031136
Away_Shots
-0.475808
0.475808
-0.250153
0.630199
-0.354441
1.000000
-0.436438
0.438792
-0.443691
0.396951
...
0.006007
-0.274016
-0.086549
-0.057355
-0.025990
-0.031674
0.302355
0.131967
-0.009907
0.089040
Home_Touches
0.895654
-0.895654
0.332443
-0.277174
0.461211
-0.436438
1.000000
-0.646915
0.980318
-0.625277
...
0.038890
0.558134
0.213400
0.029445
0.133905
0.019861
-0.327717
-0.200768
0.018656
-0.018890
Away_Touches
-0.897972
0.897972
-0.279791
0.326558
-0.493154
0.438792
-0.646915
1.000000
-0.625229
0.977526
...
-0.004221
-0.320504
-0.153847
-0.040757
0.005563
0.003360
0.554541
0.247586
0.010373
0.108410
Home_Passes
0.877316
-0.877316
0.315805
-0.284377
0.419769
-0.443691
0.980318
-0.625229
1.000000
-0.577590
...
0.052318
0.561553
0.199638
0.044998
0.132922
0.020404
-0.323569
-0.204948
0.029555
-0.026147
Away_Passes
-0.884429
0.884429
-0.266711
0.299902
-0.493644
0.396951
-0.625277
0.977526
-0.577590
1.000000
...
0.001624
-0.295116
-0.156138
-0.025143
0.007073
0.007578
0.540404
0.227474
0.025252
0.102752
Home_Tackles
-0.172459
0.172459
-0.012261
0.070435
-0.085394
0.047551
-0.074807
0.246762
-0.134380
0.157641
...
0.013462
0.077214
0.033283
-0.007918
0.031578
-0.013654
0.200828
0.035920
0.054348
0.041557
Away_Tackles
0.169691
-0.169691
-0.019757
-0.001110
0.003944
-0.107036
0.232605
-0.079300
0.139685
-0.147408
...
-0.014019
0.164441
0.003048
0.047440
0.066700
0.011347
0.052493
-0.009029
0.029952
0.014225
Home_Clearances
-0.277403
0.277403
-0.095408
0.022782
-0.188163
0.262887
-0.269608
0.143354
-0.346759
0.095620
...
-0.045484
-0.197748
0.033539
-0.135742
-0.012801
-0.020916
-0.002795
0.084873
-0.095043
0.018308
Away_Clearances
0.214161
-0.214161
-0.010117
-0.027719
0.215589
-0.118271
0.073087
-0.200855
0.010092
-0.306799
...
-0.036738
-0.167104
0.043588
-0.081123
-0.084927
-0.008852
-0.107777
0.045625
-0.110708
0.008021
Home_Corners
0.441836
-0.441836
0.267085
-0.183954
0.495146
-0.317978
0.335527
-0.414290
0.295668
-0.436616
...
-0.027674
0.198027
0.057102
0.023322
0.054729
-0.027624
-0.228946
-0.071335
-0.037770
-0.038928
Away_Corners
-0.362764
0.362764
-0.163933
0.244439
-0.261911
0.498755
-0.354564
0.267462
-0.374861
0.228546
...
0.015213
-0.132627
-0.026517
-0.062478
0.018675
-0.046770
0.210584
0.087795
0.029492
0.009556
Home_Offsides
-0.017331
0.017331
0.026309
-0.024810
-0.018583
-0.049601
-0.004910
0.002334
-0.020296
0.006835
...
0.058818
0.044848
0.033794
-0.006883
0.000802
-0.007569
0.059502
-0.037806
0.022949
0.077124
Away_Offsides
-0.016569
0.016569
0.011807
0.038690
-0.009970
-0.032591
-0.027856
0.016967
-0.027339
-0.002533
...
-0.006855
-0.016913
-0.005802
-0.032585
0.043951
0.009871
0.051386
0.031946
0.001091
-0.004269
HTP
0.025225
-0.025225
0.053084
0.027838
-0.014093
0.006007
0.038890
-0.004221
0.052318
0.001624
...
1.000000
0.056394
-0.062121
0.083333
0.011258
0.033470
-0.054241
-0.053437
0.013525
0.011634
HTR
0.464898
-0.464898
0.330653
-0.195858
0.309978
-0.274016
0.558134
-0.320504
0.561553
-0.295116
...
0.056394
1.000000
0.204030
0.143205
0.359908
0.001965
-0.014041
-0.116419
0.083728
0.033848
HTAR
0.190234
-0.190234
0.133486
-0.073407
0.139142
-0.086549
0.213400
-0.153847
0.199638
-0.156138
...
-0.062121
0.204030
1.000000
-0.748693
-0.120574
0.042061
-0.054006
-0.033736
0.024187
-0.036268
HTMR
0.042298
-0.042298
0.005930
-0.008440
0.010991
-0.057355
0.029445
-0.040757
0.044998
-0.025143
...
0.083333
0.143205
-0.748693
1.000000
-0.328857
-0.031348
0.035144
-0.006556
0.008865
0.028434
HTDR
0.065889
-0.065889
0.086834
-0.052068
0.052610
-0.025990
0.133905
0.005563
0.132922
0.007073
...
0.011258
0.359908
-0.120574
-0.328857
1.000000
-0.011105
0.012031
-0.042989
0.024615
0.038627
ATP
0.005962
-0.005962
0.070499
-0.010597
0.027267
-0.031674
0.019861
0.003360
0.020404
0.007578
...
0.033470
0.001965
0.042061
-0.031348
-0.011105
1.000000
0.026136
-0.046073
0.074177
-0.023717
ATR
-0.473204
0.473204
-0.219733
0.323539
-0.305942
0.302355
-0.327717
0.554541
-0.323569
0.540404
...
-0.054241
-0.014041
-0.054006
0.035144
0.012031
0.026136
1.000000
0.205996
0.166793
0.332486
ATAR
-0.240212
0.240212
-0.112282
0.106688
-0.119271
0.131967
-0.200768
0.247586
-0.204948
0.227474
...
-0.053437
-0.116419
-0.033736
-0.006556
-0.042989
-0.046073
0.205996
1.000000
-0.755884
-0.152354
ATMR
0.003324
-0.003324
0.001539
0.010279
-0.040202
-0.009907
0.018656
0.010373
0.029555
0.025252
...
0.013525
0.083728
0.024187
0.008865
0.024615
0.074177
0.166793
-0.755884
1.000000
-0.280309
ATDR
-0.066883
0.066883
-0.029892
0.115561
-0.031136
0.089040
-0.018890
0.108410
-0.026147
0.102752
...
0.011634
0.033848
-0.036268
0.028434
0.038627
-0.023717
0.332486
-0.152354
-0.280309
1.000000
28 rows × 28 columns
In [26]:
fig = plt.figure(figsize=(35,35))
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,28,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.show()
Content source: sourabhswain/Prolego
Similar notebooks: