In [12]:
#points matplot lib to plot the figure inside notebook
#pandas is a data science library
#matplotlib: for data plotting
#matplotlib.pyplot : to define a plot, part of 
%matplotlib inline 
import pandas as pd 
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np

In [3]:
#pandas work with data frames
#read_csv

df = pd.read_csv('ScoreBoardFinal.csv')
df.head(10)


Out[3]:
MId Home_Team Home_Team_Goals Away_Team_Goals Away_Team Home_Poss Away_Poss Home_ShotsT Away_ShotsT Home_Shots ... HTP HTR HTAR HTMR HTDR ATP ATR ATAR ATMR ATDR
0 9601 CAR 1 2 CHE 36.8 63.2 3 7 10 ... 14 775.406 151.256 269.512 354.638 14 883.673 166.000 315.499 402.174
1 9602 FUL 2 2 CRY 66.1 33.9 5 6 15 ... 14 793.834 182.616 210.218 401.000 14 790.224 144.288 287.936 358.000
2 9603 HUL 0 2 EVE 43 57 3 4 12 ... 14 792.189 216.189 219.000 357.000 14 862.968 173.566 293.402 396.000
3 9604 LIV 2 1 NUFC 66.2 33.8 5 2 13 ... 14 884.043 269.651 220.392 394.000 14 827.956 84.175 294.260 449.521
4 9605 MCFC 2 0 WHU 68.1 31.9 7 0 28 ... 14 912.543 155.313 350.230 407.000 14 808.489 129.325 315.164 364.000
5 9606 NOR 0 2 ARS 37.7 62.3 5 8 11 ... 14 805.636 93.648 341.988 370.000 14 869.152 230.624 252.528 386.000
6 9607 SOT 1 1 MUFC 58.5 41.5 6 2 15 ... 14 830.718 157.239 296.034 377.445 14 883.726 251.335 229.391 403.000
7 9608 SUN 1 3 SWAN 53.3 46.7 4 4 20 ... 14 798.745 76.958 361.257 360.530 14 795.936 278.427 163.175 354.334
8 9609 TOT 3 0 AVL 54.7 45.3 6 1 12 ... 14 841.663 148.000 291.936 401.727 13 800.032 181.180 183.000 435.852
9 9610 WBA 1 2 STK 43 57 4 4 18 ... 14 810.201 166.403 349.500 294.298 14 793.845 85.944 349.901 358.000

10 rows × 33 columns


In [6]:
#.loc returns the rows based on the filters or conditions
df_tmp = df.loc[df['Home_ShotsT'] != 'XX-XX-']
df_tmp.head(10)


Out[6]:
MId Home_Team Home_Team_Goals Away_Team_Goals Away_Team Home_Poss Away_Poss Home_ShotsT Away_ShotsT Home_Shots ... HTP HTR HTAR HTMR HTDR ATP ATR ATAR ATMR ATDR
0 9601 CAR 1 2 CHE 36.8 63.2 3 7 10 ... 14 775.406 151.256 269.512 354.638 14 883.673 166.000 315.499 402.174
1 9602 FUL 2 2 CRY 66.1 33.9 5 6 15 ... 14 793.834 182.616 210.218 401.000 14 790.224 144.288 287.936 358.000
2 9603 HUL 0 2 EVE 43 57 3 4 12 ... 14 792.189 216.189 219.000 357.000 14 862.968 173.566 293.402 396.000
3 9604 LIV 2 1 NUFC 66.2 33.8 5 2 13 ... 14 884.043 269.651 220.392 394.000 14 827.956 84.175 294.260 449.521
4 9605 MCFC 2 0 WHU 68.1 31.9 7 0 28 ... 14 912.543 155.313 350.230 407.000 14 808.489 129.325 315.164 364.000
5 9606 NOR 0 2 ARS 37.7 62.3 5 8 11 ... 14 805.636 93.648 341.988 370.000 14 869.152 230.624 252.528 386.000
6 9607 SOT 1 1 MUFC 58.5 41.5 6 2 15 ... 14 830.718 157.239 296.034 377.445 14 883.726 251.335 229.391 403.000
7 9608 SUN 1 3 SWAN 53.3 46.7 4 4 20 ... 14 798.745 76.958 361.257 360.530 14 795.936 278.427 163.175 354.334
8 9609 TOT 3 0 AVL 54.7 45.3 6 1 12 ... 14 841.663 148.000 291.936 401.727 13 800.032 181.180 183.000 435.852
9 9610 WBA 1 2 STK 43 57 4 4 18 ... 14 810.201 166.403 349.500 294.298 14 793.845 85.944 349.901 358.000

10 rows × 33 columns


In [23]:
df = df_tmp.ix[:,'Home_Poss':]
df = df.apply(lambda x: pd.to_numeric(x, errors = 'ignore'))
names = df.columns
correlations = df.corr()
# plot correlation matrix
correlations


Out[23]:
Home_Poss Away_Poss Home_ShotsT Away_ShotsT Home_Shots Away_Shots Home_Touches Away_Touches Home_Passes Away_Passes ... HTP HTR HTAR HTMR HTDR ATP ATR ATAR ATMR ATDR
Home_Poss 1.000000 -1.000000 0.316120 -0.315443 0.524575 -0.475808 0.895654 -0.897972 0.877316 -0.884429 ... 0.025225 0.464898 0.190234 0.042298 0.065889 0.005962 -0.473204 -0.240212 0.003324 -0.066883
Away_Poss -1.000000 1.000000 -0.316120 0.315443 -0.524575 0.475808 -0.895654 0.897972 -0.877316 0.884429 ... -0.025225 -0.464898 -0.190234 -0.042298 -0.065889 -0.005962 0.473204 0.240212 -0.003324 0.066883
Home_ShotsT 0.316120 -0.316120 1.000000 -0.173285 0.665978 -0.250153 0.332443 -0.279791 0.315805 -0.266711 ... 0.053084 0.330653 0.133486 0.005930 0.086834 0.070499 -0.219733 -0.112282 0.001539 -0.029892
Away_ShotsT -0.315443 0.315443 -0.173285 1.000000 -0.229451 0.630199 -0.277174 0.326558 -0.284377 0.299902 ... 0.027838 -0.195858 -0.073407 -0.008440 -0.052068 -0.010597 0.323539 0.106688 0.010279 0.115561
Home_Shots 0.524575 -0.524575 0.665978 -0.229451 1.000000 -0.354441 0.461211 -0.493154 0.419769 -0.493644 ... -0.014093 0.309978 0.139142 0.010991 0.052610 0.027267 -0.305942 -0.119271 -0.040202 -0.031136
Away_Shots -0.475808 0.475808 -0.250153 0.630199 -0.354441 1.000000 -0.436438 0.438792 -0.443691 0.396951 ... 0.006007 -0.274016 -0.086549 -0.057355 -0.025990 -0.031674 0.302355 0.131967 -0.009907 0.089040
Home_Touches 0.895654 -0.895654 0.332443 -0.277174 0.461211 -0.436438 1.000000 -0.646915 0.980318 -0.625277 ... 0.038890 0.558134 0.213400 0.029445 0.133905 0.019861 -0.327717 -0.200768 0.018656 -0.018890
Away_Touches -0.897972 0.897972 -0.279791 0.326558 -0.493154 0.438792 -0.646915 1.000000 -0.625229 0.977526 ... -0.004221 -0.320504 -0.153847 -0.040757 0.005563 0.003360 0.554541 0.247586 0.010373 0.108410
Home_Passes 0.877316 -0.877316 0.315805 -0.284377 0.419769 -0.443691 0.980318 -0.625229 1.000000 -0.577590 ... 0.052318 0.561553 0.199638 0.044998 0.132922 0.020404 -0.323569 -0.204948 0.029555 -0.026147
Away_Passes -0.884429 0.884429 -0.266711 0.299902 -0.493644 0.396951 -0.625277 0.977526 -0.577590 1.000000 ... 0.001624 -0.295116 -0.156138 -0.025143 0.007073 0.007578 0.540404 0.227474 0.025252 0.102752
Home_Tackles -0.172459 0.172459 -0.012261 0.070435 -0.085394 0.047551 -0.074807 0.246762 -0.134380 0.157641 ... 0.013462 0.077214 0.033283 -0.007918 0.031578 -0.013654 0.200828 0.035920 0.054348 0.041557
Away_Tackles 0.169691 -0.169691 -0.019757 -0.001110 0.003944 -0.107036 0.232605 -0.079300 0.139685 -0.147408 ... -0.014019 0.164441 0.003048 0.047440 0.066700 0.011347 0.052493 -0.009029 0.029952 0.014225
Home_Clearances -0.277403 0.277403 -0.095408 0.022782 -0.188163 0.262887 -0.269608 0.143354 -0.346759 0.095620 ... -0.045484 -0.197748 0.033539 -0.135742 -0.012801 -0.020916 -0.002795 0.084873 -0.095043 0.018308
Away_Clearances 0.214161 -0.214161 -0.010117 -0.027719 0.215589 -0.118271 0.073087 -0.200855 0.010092 -0.306799 ... -0.036738 -0.167104 0.043588 -0.081123 -0.084927 -0.008852 -0.107777 0.045625 -0.110708 0.008021
Home_Corners 0.441836 -0.441836 0.267085 -0.183954 0.495146 -0.317978 0.335527 -0.414290 0.295668 -0.436616 ... -0.027674 0.198027 0.057102 0.023322 0.054729 -0.027624 -0.228946 -0.071335 -0.037770 -0.038928
Away_Corners -0.362764 0.362764 -0.163933 0.244439 -0.261911 0.498755 -0.354564 0.267462 -0.374861 0.228546 ... 0.015213 -0.132627 -0.026517 -0.062478 0.018675 -0.046770 0.210584 0.087795 0.029492 0.009556
Home_Offsides -0.017331 0.017331 0.026309 -0.024810 -0.018583 -0.049601 -0.004910 0.002334 -0.020296 0.006835 ... 0.058818 0.044848 0.033794 -0.006883 0.000802 -0.007569 0.059502 -0.037806 0.022949 0.077124
Away_Offsides -0.016569 0.016569 0.011807 0.038690 -0.009970 -0.032591 -0.027856 0.016967 -0.027339 -0.002533 ... -0.006855 -0.016913 -0.005802 -0.032585 0.043951 0.009871 0.051386 0.031946 0.001091 -0.004269
HTP 0.025225 -0.025225 0.053084 0.027838 -0.014093 0.006007 0.038890 -0.004221 0.052318 0.001624 ... 1.000000 0.056394 -0.062121 0.083333 0.011258 0.033470 -0.054241 -0.053437 0.013525 0.011634
HTR 0.464898 -0.464898 0.330653 -0.195858 0.309978 -0.274016 0.558134 -0.320504 0.561553 -0.295116 ... 0.056394 1.000000 0.204030 0.143205 0.359908 0.001965 -0.014041 -0.116419 0.083728 0.033848
HTAR 0.190234 -0.190234 0.133486 -0.073407 0.139142 -0.086549 0.213400 -0.153847 0.199638 -0.156138 ... -0.062121 0.204030 1.000000 -0.748693 -0.120574 0.042061 -0.054006 -0.033736 0.024187 -0.036268
HTMR 0.042298 -0.042298 0.005930 -0.008440 0.010991 -0.057355 0.029445 -0.040757 0.044998 -0.025143 ... 0.083333 0.143205 -0.748693 1.000000 -0.328857 -0.031348 0.035144 -0.006556 0.008865 0.028434
HTDR 0.065889 -0.065889 0.086834 -0.052068 0.052610 -0.025990 0.133905 0.005563 0.132922 0.007073 ... 0.011258 0.359908 -0.120574 -0.328857 1.000000 -0.011105 0.012031 -0.042989 0.024615 0.038627
ATP 0.005962 -0.005962 0.070499 -0.010597 0.027267 -0.031674 0.019861 0.003360 0.020404 0.007578 ... 0.033470 0.001965 0.042061 -0.031348 -0.011105 1.000000 0.026136 -0.046073 0.074177 -0.023717
ATR -0.473204 0.473204 -0.219733 0.323539 -0.305942 0.302355 -0.327717 0.554541 -0.323569 0.540404 ... -0.054241 -0.014041 -0.054006 0.035144 0.012031 0.026136 1.000000 0.205996 0.166793 0.332486
ATAR -0.240212 0.240212 -0.112282 0.106688 -0.119271 0.131967 -0.200768 0.247586 -0.204948 0.227474 ... -0.053437 -0.116419 -0.033736 -0.006556 -0.042989 -0.046073 0.205996 1.000000 -0.755884 -0.152354
ATMR 0.003324 -0.003324 0.001539 0.010279 -0.040202 -0.009907 0.018656 0.010373 0.029555 0.025252 ... 0.013525 0.083728 0.024187 0.008865 0.024615 0.074177 0.166793 -0.755884 1.000000 -0.280309
ATDR -0.066883 0.066883 -0.029892 0.115561 -0.031136 0.089040 -0.018890 0.108410 -0.026147 0.102752 ... 0.011634 0.033848 -0.036268 0.028434 0.038627 -0.023717 0.332486 -0.152354 -0.280309 1.000000

28 rows × 28 columns


In [26]:
fig = plt.figure(figsize=(35,35))
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,28,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.show()