Table of content

Imports and loading dataset
Exploration of the dataset
Engineering the features
Model creation and fitting

-imports and functions

-setup

IMPORT & LOAD



In [26]:

    
####Import libraries
# SQL
import sqlite3
# Data Manipulation
import numpy as np
import pandas as pd
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno   #NaNs
#Nice Tables
from ipy_table import *

# Import and suppress warnings
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline



In [29]:

    
#load data (make sure you have downloaded database.sqlite)
with sqlite3.connect(r'C:/Python/Soccer1/database.sqlite') as con:
    country_df = pd.read_sql_query("SELECT * from Country", con)
    matches_df = pd.read_sql_query("SELECT * from Match", con)
    league_df = pd.read_sql_query("SELECT * from League", con)
    team_df = pd.read_sql_query("SELECT * from Team", con)
    player_df = pd.read_sql_query("select * from Player", con)
    player_stats_df = pd.read_sql_query("select * from player_attributes", con)
    
#load past results
super_table=[["Name", 'Regression function', "Train Acc", 'Validation Acc', "r2_score", "conf_matrix", 'Cross-Valid.'],]
df_load = pd.read_csv(r'C:/Python/Soccer/out/results_table.csv')
for row in df_load.values.tolist():
    super_table.append(row)



In [3]:

    
player_vector = ['home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11']

for player in player_vector:
    player_stat = pd.merge(matches_df, player_stats_df, left_on = player, 
                                right_on = "player_api_id")
    matches_df[player+'_overall_rating'] = player_stat['overall_rating'].copy()



In [4]:

    
all_df = [country_df, matches_df, league_df, team_df, player_df, player_stats_df]
country_df.dfname = 'country_df'
matches_df.dfname = 'matches_df'
league_df.dfname = 'league_df' 
team_df.dfname = 'team_df' 
player_df.dfname = 'player_df'
player_stats_df.dfname = 'player_stats_df'

DATA EXPLORATION



In [6]:

    
for df in all_df:
    print(df.dfname)
    print(df.info())
    print('-'*40)









    



country_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 2 columns):
id      11 non-null int64
name    11 non-null object
dtypes: int64(1), object(1)
memory usage: 256.0+ bytes
None
----------------------------------------
matches_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25979 entries, 0 to 25978
Columns: 115 entries, id to BSA
dtypes: float64(96), int64(9), object(10)
memory usage: 22.8+ MB
None
----------------------------------------
league_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 3 columns):
id            11 non-null int64
country_id    11 non-null int64
name          11 non-null object
dtypes: int64(2), object(1)
memory usage: 344.0+ bytes
None
----------------------------------------
team_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 5 columns):
id                  299 non-null int64
team_api_id         299 non-null int64
team_fifa_api_id    288 non-null float64
team_long_name      299 non-null object
team_short_name     299 non-null object
dtypes: float64(1), int64(2), object(2)
memory usage: 11.8+ KB
None
----------------------------------------
player_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11060 entries, 0 to 11059
Data columns (total 7 columns):
id                    11060 non-null int64
player_api_id         11060 non-null int64
player_name           11060 non-null object
player_fifa_api_id    11060 non-null int64
birthday              11060 non-null object
height                11060 non-null float64
weight                11060 non-null int64
dtypes: float64(1), int64(4), object(2)
memory usage: 604.9+ KB
None
----------------------------------------



In [5]:

    
msno.matrix(matches_df)



In [6]:

    
matches_df.dropna(thresh=80, inplace=True)



In [7]:

    
msno.matrix(matches_df)



In [9]:

    
msno.dendrogram(matches_df)



In [11]:

    
matches_df.hist(figsize = (100, 100))
plt.show()



In [5]:

    
bookies = ['B365', 'BW', 'IW', 'LB', 'PS', 'WH', 'SJ', 'VC', 'GB', 'BS']
bookies_H = [bookie+'H' for bookie in bookies]
bookies_A = [bookie+'A' for bookie in bookies]
bookies_D = [bookie+'D' for bookie in bookies]
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}



In [13]:

    
for home, draw, away in zip(bookies_H,bookies_A,bookies_D):
    fig, ax = plt.subplots()
    for odds in [home, draw, away]:
        sns.distplot(matches_df[odds].dropna(), ax=ax, label=odds, hist = False)
    #set title
    plt.title(home[:-1], fontsize=16)
    #remove x label
    ax.set_xlabel('')
    ax.set_xlim([0, 8])
    plt.show()



In [14]:

    
plt.rc('legend',fontsize=30)
#_______All bookmakers - Home/Draw/Away odds | KDE + BOXPLOTS
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}
for bookie_type, bookie_list in bookies_types.items():
    fig, axes = plt.subplots(ncols=2, figsize=(30,15))
    axes[0].set_xlim([0, 8])
    if bookie_type=='Home odds':
        axes[0].set_ylim([0, 0.65])
    elif bookie_type =='Draw odds':
        axes[0].set_ylim([0, 2.3])
    else:
        axes[0].set_ylim([0, 0.35])
    for bookie in bookie_list:
        sns.distplot(matches_df[bookie].dropna(), ax = axes[0], label=bookie, hist = False)
    #remove x label
    axes[0].set_xlabel('')
    #locate legend 
    plt.legend(loc='best')
    col_sel = bookie_list
    bookie_sel_df = matches_df[bookie_list]
    axes[1] = sns.boxplot(data=bookie_sel_df, palette='Set2', showmeans=True)
    if bookie_type=='Home odds':
        axes[1].set_ylim([1, 5])
    elif bookie_type =='Draw odds':
        axes[1].set_ylim([1, 10])
    else:
        axes[1].set_ylim([1, 5.5])
    
    plt.suptitle(str(bookie_type), fontsize=80)
    plt.show()



In [15]:

    
#How many goals - Home\Away
goals_df = matches_df[['home_team_goal', 'away_team_goal']]
color = ['red', 'lime']
fig, ax = plt.subplots()
ax.set_xlim([0, 10])
ax.set_ylim([0, 9500])
sns.distplot(goals_df.dropna(), ax = ax, kde = False, color = color)
plt.show()

#Contingency table
goals_home_vs_away = pd.crosstab(index = matches_df["home_team_goal"],
                                 columns = matches_df["away_team_goal"])
print(goals_home_vs_away)









    












    



away_team_goal     0     1     2    3    4   5   6  7  8  9
home_team_goal                                             
0               1678  1580  1026  450  194  58  17  5  3  1
1               2238  2556  1429  640  218  64  17  2  1  0
2               1807  1879  1120  411  144  38  14  1  1  0
3                972  1017   524  230   46  16   7  0  0  0
4                456   412   205   78   26   9   0  0  0  0
5                152   157    62   20    5   2   0  0  0  0
6                 55    52    24    4    1   0   1  0  0  0
7                 20    12     1    3    0   0   0  0  0  0
8                  5     3     1    0    0   0   0  0  0  0
9                  1     2     1    0    0   0   0  0  0  0
10                 1     0     1    0    0   0   0  0  0  0

FEATURE ENGINEERING



In [6]:

    
#preprocessing libraries
from sklearn.preprocessing import LabelEncoder, StandardScaler, Imputer, OneHotEncoder



In [18]:

    
def impute_numeric (dataset, formula):
    """
    Impute numeric values in a dataset usinng linear regression
    dataset = Pandas Dataframe
    formula e.g. 'Y ~ X1 + X2'
    """
    import statsmodels.formula.api as smf
    import pandas as pd
    
    lm = smf.ols(formula = formula, data = dataset)
    res = lm.fit()
    
    temp_train = dataset[pd.isnull(dataset).any(axis=1)].copy()
    temp_train = temp_train.drop(formula.split(None, 1)[0], axis=1).copy()
    
    var_pred = res.predict(temp_train)
    var_pred = var_pred.round(decimals=0)
    
    dataset[formula.split(None, 1)[0]].fillna(var_pred, inplace=True)



In [7]:

    
def label_win (row):
    if row['home_team_goal'] > row['away_team_goal']:
        return 'WIN'
    if row['home_team_goal'] == row['away_team_goal']:
        return 'DRAW'
    if row['home_team_goal'] < row['away_team_goal']:
        return 'LOSE'

matches_df['RESULT'] = matches_df.apply(lambda row: label_win(row), axis=1)



In [8]:

    
ml_matches_df = matches_df[[ 'country_id', 'league_id', 'season', 'stage', 'date',
       'home_team_api_id', 'away_team_api_id', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'B365H', 'B365D', 'B365A', 'BWH',
       'BWD', 'BWA', 'IWH', 'IWD', 'IWA', 'LBH', 'LBD', 'LBA', 'PSH',
       'PSD', 'PSA', 'WHH', 'WHD', 'WHA', 'SJH', 'SJD', 'SJA', 'VCH',
       'VCD', 'VCA', 'GBH', 'GBD', 'GBA', 'BSH', 'BSD', 'BSA', 'home_player_1_overall_rating', 'home_player_2_overall_rating',
       'home_player_3_overall_rating', 'home_player_4_overall_rating',
       'home_player_5_overall_rating', 'home_player_6_overall_rating',
       'home_player_7_overall_rating', 'home_player_8_overall_rating',
       'home_player_9_overall_rating', 'home_player_10_overall_rating',
       'home_player_11_overall_rating', 'away_player_1_overall_rating',
       'away_player_2_overall_rating', 'away_player_3_overall_rating',
       'away_player_4_overall_rating', 'away_player_5_overall_rating',
       'away_player_6_overall_rating', 'away_player_7_overall_rating',
       'away_player_8_overall_rating', 'away_player_9_overall_rating',
       'away_player_10_overall_rating', 'away_player_11_overall_rating', 'RESULT']]



In [9]:

    
#dropping NaNs everywhere where it's <300 records
ml_matches_df.dropna(subset = ['B365H', 'BWH', 'IWH', 'WHH', 'VCH', 'LBH', 'home_team_api_id', 'away_team_api_id', 
        'home_player_X1', 'home_player_X2', 'home_player_X3', 'home_player_X4',
        'home_player_X5', 'home_player_X6', 'home_player_X7', 'home_player_X8', 
        'home_player_X9', 'home_player_X10', 'home_player_X11', 'away_player_X1', 
        'away_player_X2', 'away_player_X3', 'away_player_X4', 'away_player_X5',
        'away_player_X6', 'away_player_X7', 'away_player_X8', 'away_player_X9', 
        'away_player_X10', 'away_player_X11', 'home_player_Y1', 'home_player_Y2', 
        'home_player_Y3', 'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
        'home_player_Y7', 'home_player_Y8', 'home_player_Y9', 'home_player_Y10', 
        'home_player_Y11', 'away_player_Y1', 'away_player_Y2', 'away_player_Y3', 
        'away_player_Y4', 'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
        'away_player_Y8', 'away_player_Y9', 'away_player_Y10', 'away_player_Y11', 
        'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
        'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
        'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
        'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
        'away_player_8', 'away_player_9', 'away_player_10','away_player_11',
        'home_player_1', 'home_player_2', 'home_player_1_overall_rating', 'home_player_2_overall_rating',
       'home_player_3_overall_rating', 'home_player_4_overall_rating',
       'home_player_5_overall_rating', 'home_player_6_overall_rating',
       'home_player_7_overall_rating', 'home_player_8_overall_rating',
       'home_player_9_overall_rating', 'home_player_10_overall_rating',
       'home_player_11_overall_rating', 'away_player_1_overall_rating',
       'away_player_2_overall_rating', 'away_player_3_overall_rating',
       'away_player_4_overall_rating', 'away_player_5_overall_rating',
       'away_player_6_overall_rating', 'away_player_7_overall_rating',
       'away_player_8_overall_rating', 'away_player_9_overall_rating',
       'away_player_10_overall_rating', 'away_player_11_overall_rating'
                            ], how='any', inplace=True)



In [13]:

    
ml_matches_df.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 18533 entries, 145 to 24556
Columns: 126 entries, country_id to RESULT
dtypes: float64(118), int64(5), object(3)
memory usage: 18.0+ MB



In [10]:

    
#imputation for less popular bookies
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}
for bookie_type, bookie_list in bookies_types.items():
    bookie_sel_df = ml_matches_df[bookie_list]
    bookie_sel_df = bookie_sel_df.apply(lambda x: x.fillna(x.mean()),axis=1)
    ml_matches_df[bookie_list]=bookie_sel_df.copy()



In [11]:

    
msno.matrix(ml_matches_df)

Feature pre-selection



In [13]:

    
matches_data = ml_matches_df.drop('RESULT', 1)
matches_target = ml_matches_df[['RESULT']]



In [14]:

    
ml_matches_df.shape









    Out[14]:





(18533, 126)



In [15]:

    
matches_data.shape









    Out[15]:





(18533, 125)



In [16]:

    
matches_target.shape









    Out[16]:





(18533, 1)



In [17]:

    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(matches_data, np.ravel(matches_target), test_size = 0.20, random_state = 101)



In [31]:

    
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
clf = clf.fit(X_train, y_train)

Features based on tree



In [32]:

    
features_tree = pd.DataFrame()
features_tree['feature'] = matches_data.columns
features_tree['importance'] = clf.feature_importances_
features_tree.sort_values(by=['importance'], ascending=True, inplace=True)
features_tree.set_index('feature', inplace=True)

Features based on chi-square



In [18]:

    
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
features_chi = pd.DataFrame()
features_chi['feature'] = matches_data.columns
#model = SelectKBest(chi2, k=125)
#fit = model.fit(X_train, y_train)
#np.set_printoptions(precision=3)
#print(fit.scores_)
features_chi2 = chi2(X_train, y_train)



In [19]:

    
features_chi['importance'] = features_chi2[0]



In [20]:

    
features_chi.sort_values(by=['importance'], ascending=True, inplace=True)
features_chi.set_index('feature', inplace=True)



In [25]:

    
features_chi.plot(kind='barh', figsize=(20, 20))









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x2c512010fd0>



In [34]:

    
features_tree.plot(kind='barh', figsize=(20, 20))









    Out[34]:





<matplotlib.axes._subplots.AxesSubplot at 0x17528a2eac8>

Reducing dataset to only relevant features



In [22]:

    
X_train_reduced = model.transform(X_train)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-9570a5f7d736> in <module>()
----> 1 X_train_reduced = model.transform(X_train)

NameError: name 'model' is not defined



In [82]:

    
X_train_reduced.shape









    Out[82]:





(14826, 73)



In [83]:

    
X_test_reduced = model.transform(X_test)



In [84]:

    
X_test_reduced.shape









    Out[84]:





(3707, 73)



In [85]:

    
from sklearn.linear_model import LogisticRegression

GridSearch



In [86]:

    
param_grid = {'C': [0.1, 1, 10, 100, 1000]}



In [87]:

    
from sklearn.model_selection import GridSearchCV



In [88]:

    
grid = GridSearchCV(LogisticRegression(), param_grid, refit=True, verbose = 3)



In [89]:

    
grid.fit(X_train_reduced, y_train)









    



Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] C=0.1 ...........................................................
[CV] ............................ C=0.1, score=0.509911, total=   4.6s
[CV] C=0.1 ...........................................................






    



[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.6s remaining:    0.0s






    



[CV] ............................ C=0.1, score=0.526816, total=   4.7s
[CV] C=0.1 ...........................................................






    



[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    9.4s remaining:    0.0s






    



[CV] ............................ C=0.1, score=0.529245, total=   4.6s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.520429, total=   4.8s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.529245, total=   5.2s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.504351, total=   3.1s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.494539, total=   3.8s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.528840, total=   6.4s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.525400, total=   4.2s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.512136, total=   4.8s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.526614, total=   5.7s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.517102, total=   4.2s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.521036, total=   4.5s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.518519, total=   4.9s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.519126, total=   3.7s






    



[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:  1.2min finished






    Out[89]:





GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 1, 10, 100, 1000]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score=True, scoring=None, verbose=3)



In [90]:

    
grid.best_params_









    Out[90]:





{'C': 0.1}



In [91]:

    
grid.best_estimator_









    Out[91]:





LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)



In [92]:

    
grid_predictions = grid.predict(X_test_reduced)



In [95]:

    
from sklearn.metrics import classification_report, confusion_matrix



In [96]:

    
print(confusion_matrix(y_test, grid_predictions))









    



[[  11  136  734]
 [  28  333  742]
 [  10  117 1596]]



In [97]:

    
print(classification_report(y_test, grid_predictions))









    



             precision    recall  f1-score   support

        0.0       0.22      0.01      0.02       881
        1.0       0.57      0.30      0.39      1103
        2.0       0.52      0.93      0.67      1723

avg / total       0.46      0.52      0.43      3707



In [12]:

    
#labelling
le = LabelEncoder()

for col in ml_matches_df.columns.values:
   # Encoding only categorical variables
   if ml_matches_df[col].dtypes=='object':
   # Using whole data to form an exhaustive list of levels
       data=ml_matches_df[col].append(ml_matches_df[col])
       le.fit(data.values)
       ml_matches_df[col]=le.transform(ml_matches_df[col])

ml_matches_df['RESULT'] = ml_matches_df['RESULT'].astype(float)



In [27]:

    
#scale Features
scaled_features = StandardScaler(with_mean=True, with_std=True).fit_transform(ml_matches_df.values)
scaled_features_df = pd.DataFrame(scaled_features, index=ml_matches_df.index, columns=ml_matches_df.columns)

def remap_match_result(x):
    if x < -0.5:
        return -1
    elif x < 0.5:
        return 0
    else:
        return 1
scaled_features_df['RESULT'] = scaled_features_df['RESULT'].map(remap_match_result)

scaled_features_df.head()









    Out[27]:







  
    
      
      country_id
      league_id
      season
      stage
      date
      home_team_api_id
      away_team_api_id
      home_player_X1
      home_player_X2
      home_player_X3
      home_player_X4
      home_player_X5
      home_player_X6
      home_player_X7
      home_player_X8
      home_player_X9
      home_player_X10
      home_player_X11
      away_player_X1
      away_player_X2
      away_player_X3
      away_player_X4
      away_player_X5
      away_player_X6
      away_player_X7
      away_player_X8
      away_player_X9
      away_player_X10
      away_player_X11
      home_player_Y1
      home_player_Y2
      home_player_Y3
      home_player_Y4
      home_player_Y5
      home_player_Y6
      home_player_Y7
      home_player_Y8
      home_player_Y9
      home_player_Y10
      home_player_Y11
      away_player_Y1
      away_player_Y2
      away_player_Y3
      away_player_Y4
      away_player_Y5
      away_player_Y6
      away_player_Y7
      away_player_Y8
      away_player_Y9
      away_player_Y10
      away_player_Y11
      home_player_1
      home_player_2
      home_player_3
      home_player_4
      home_player_5
      home_player_6
      home_player_7
      home_player_8
      home_player_9
      home_player_10
      home_player_11
      away_player_1
      away_player_2
      away_player_3
      away_player_4
      away_player_5
      away_player_6
      away_player_7
      away_player_8
      away_player_9
      away_player_10
      away_player_11
      B365H
      B365D
      B365A
      BWH
      BWD
      BWA
      IWH
      IWD
      IWA
      LBH
      LBD
      LBA
      PSH
      PSD
      PSA
      WHH
      WHD
      WHA
      SJH
      SJD
      SJA
      VCH
      VCD
      VCA
      GBH
      GBD
      GBA
      BSH
      BSD
      BSA
      RESULT
    
  
  
    
      145
      -1.440532
      -1.440532
      -1.677216
      0.515898
      -1.559993
      -0.135024
      -0.001675
      0.0
      -0.198326
      -0.154246
      -0.115885
      0.28336
      -1.00501
      -0.741363
      0.401581
      1.127873
      -0.949555
      0.265198
      0.0
      -0.182678
      -0.144603
      -0.116614
      0.288303
      -0.97221
      -0.691287
      0.41015
      1.143707
      -0.97159
      0.285025
      0.0
      0.0
      -0.00715
      -0.01599
      -0.256033
      0.721283
      0.575381
      -0.401031
      -0.941504
      0.722798
      -0.886236
      0.0
      0.0
      0.0
      -0.00715
      -0.256775
      0.715858
      0.560073
      -0.414719
      -0.936542
      0.754508
      -0.917742
      -0.429893
      -0.337037
      -0.224224
      -0.246484
      -0.635638
      -0.578564
      0.497046
      -0.334056
      0.459801
      -0.586835
      -0.694314
      -0.435937
      -0.602667
      0.580849
      0.112343
      0.346244
      -0.575405
      -0.495636
      -0.848021
      -0.589961
      -0.650718
      -0.588404
      0.196088
      -0.406710
      -0.632078
      0.226035
      -0.494900
      -0.665279
      0.214631
      -0.649964
      -0.681214
      0.117349
      -0.475849
      -0.643504
      0.082688
      -0.538962
      -0.626417
      0.011502
      -0.498430
      -0.607742
      0.240605
      -0.567410
      -0.619835
      -0.020894
      -0.538450
      -0.578251
      0.177566
      -0.495606
      -0.628619
      0.118295
      -0.551762
      -0.635042
      1
    
    
      153
      -1.440532
      -1.440532
      -1.677216
      0.611261
      -1.543611
      0.002016
      -0.125486
      0.0
      -0.198326
      -0.154246
      -0.115885
      0.28336
      -1.00501
      -0.741363
      0.401581
      1.127873
      -0.949555
      0.265198
      0.0
      -0.182678
      -0.144603
      -0.116614
      0.288303
      -0.97221
      -0.691287
      0.41015
      1.143707
      -0.97159
      0.285025
      0.0
      0.0
      -0.00715
      -0.01599
      -0.256033
      0.721283
      0.575381
      -0.401031
      -0.941504
      0.722798
      -0.886236
      0.0
      0.0
      0.0
      -0.00715
      -0.256775
      0.715858
      0.560073
      -0.414719
      -0.936542
      0.754508
      -0.917742
      -0.447044
      -0.615512
      -0.525151
      -0.554597
      -0.043088
      -0.610629
      -0.540628
      -0.597685
      -0.621749
      -0.602553
      -0.212502
      -0.435331
      -0.749476
      -0.773347
      -0.544503
      -0.629337
      -0.568294
      -0.529328
      -0.597484
      -0.502916
      -0.697882
      -0.591531
      0.085983
      -0.584174
      -0.613425
      0.316373
      -0.734249
      -0.665279
      0.214631
      -0.649964
      -0.681214
      0.388182
      -0.525052
      -0.701889
      0.129084
      -0.586560
      -0.636187
      0.348779
      -0.498430
      -0.690366
      0.240605
      -0.521008
      -0.633148
      -0.020894
      -0.578092
      -0.578251
      0.177566
      -0.543462
      -0.628619
      0.013545
      -0.551762
      -0.598635
      0
    
    
      155
      -1.440532
      -1.440532
      -1.677216
      0.611261
      -1.545951
      -0.101783
      -0.000697
      0.0
      -0.198326
      -0.154246
      -0.115885
      0.28336
      -1.00501
      -0.741363
      0.401581
      1.127873
      -0.949555
      0.265198
      0.0
      -0.182678
      -0.144603
      -0.116614
      0.288303
      -0.97221
      -0.691287
      0.41015
      1.143707
      -0.97159
      0.285025
      0.0
      0.0
      -0.00715
      -0.01599
      -0.256033
      0.721283
      0.575381
      -0.401031
      -0.941504
      0.722798
      -0.886236
      0.0
      0.0
      0.0
      -0.00715
      -0.256775
      0.715858
      0.560073
      -0.414719
      -0.936542
      0.754508
      -0.917742
      -0.474114
      -0.603428
      -0.627898
      -0.790138
      -0.624390
      -0.628015
      -0.536527
      -0.594661
      -0.621526
      -0.587043
      -0.596329
      -0.436360
      -0.606293
      -0.513894
      -0.544197
      -0.135424
      -0.569114
      -0.544358
      0.206842
      0.165346
      -0.601101
      0.259737
      -0.684755
      0.569343
      0.886835
      -0.737574
      0.223145
      1.386640
      -0.771947
      0.462000
      0.987423
      -0.719225
      0.262203
      0.757756
      -0.690371
      0.220883
      0.758224
      -0.721204
      0.331609
      0.686701
      -0.709306
      0.639040
      1.030942
      -0.710796
      0.333665
      0.713752
      -0.729034
      0.538083
      1.156349
      -0.719706
      0.405835
      0.695220
      1
    
    
      162
      -1.440532
      -1.440532
      -1.677216
      0.706624
      -1.538930
      -0.135024
      -0.103433
      0.0
      -0.198326
      -0.154246
      -0.115885
      0.28336
      -1.00501
      -0.741363
      0.401581
      1.127873
      -0.949555
      0.265198
      0.0
      -0.182678
      -0.144603
      -0.116614
      0.288303
      -0.97221
      -0.691287
      0.41015
      1.143707
      -0.97159
      0.285025
      0.0
      0.0
      -0.00715
      -0.01599
      -0.256033
      0.721283
      0.575381
      -0.401031
      -0.941504
      0.722798
      -0.886236
      0.0
      0.0
      0.0
      -0.00715
      -0.256775
      0.715858
      0.560073
      -0.414719
      -0.936542
      0.754508
      -0.917742
      -0.429893
      -0.337037
      -0.224224
      -0.537152
      -0.364475
      -0.572033
      -0.258451
      0.372623
      0.459801
      -0.633408
      -0.694314
      -0.475463
      -0.601821
      -0.509075
      -0.609070
      0.491585
      -0.632340
      -0.540622
      -0.597078
      -0.622279
      -0.600904
      -0.604069
      1.021879
      -0.229246
      -0.778640
      1.279982
      -0.447031
      -0.816156
      1.167190
      -0.526413
      -0.855030
      0.869664
      -0.328238
      -0.783630
      0.895184
      -0.435111
      -0.761810
      1.005888
      -0.394675
      -0.772990
      1.196138
      -0.288998
      -0.771600
      0.847871
      -0.498808
      -0.715455
      1.113411
      -0.352039
      -0.786963
      0.933018
      -0.456002
      -0.775070
      1
    
    
      168
      -1.440532
      -1.440532
      -1.677216
      0.706624
      -1.536590
      0.003247
      -0.000772
      0.0
      -0.198326
      -0.154246
      -0.115885
      0.28336
      -1.00501
      -0.741363
      0.401581
      1.127873
      -0.949555
      0.265198
      0.0
      -0.182678
      -0.144603
      -0.116614
      0.288303
      -0.97221
      -0.691287
      0.41015
      1.143707
      -0.97159
      0.285025
      0.0
      0.0
      -0.00715
      -0.01599
      -0.256033
      0.721283
      0.575381
      -0.401031
      -0.941504
      0.722798
      -0.886236
      0.0
      0.0
      0.0
      -0.00715
      -0.256775
      0.715858
      0.560073
      -0.414719
      -0.936542
      0.754508
      -0.917742
      -0.434801
      -0.607952
      -0.524383
      -0.546104
      -0.628794
      -0.564939
      -0.540402
      0.225332
      0.178055
      -0.591996
      0.265646
      -0.431580
      -0.603077
      -0.727231
      0.012434
      -0.133170
      -0.496828
      -0.650817
      -0.637746
      0.454519
      -0.603662
      -0.635590
      -0.492071
      -0.317978
      0.020788
      -0.587010
      -0.207682
      0.194716
      -0.567828
      -0.279310
      0.014051
      -0.568762
      -0.475849
      0.173898
      -0.530308
      -0.374531
      -0.018961
      -0.540935
      -0.290920
      -0.048653
      -0.535062
      -0.288998
      0.099052
      -0.531932
      -0.340242
      -0.086604
      -0.553563
      -0.256327
      0.148706
      -0.521845
      -0.360243
      -0.052527
      -1

MODEL LEARNING

IMPORTS AND FUNCTIONS



In [22]:

    
# machine learning models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier



In [23]:

    
#Split data for train and test
def split_data(data, targ):
    #set target for training
    target = data[targ]

    # Import the train_test_split method
    from sklearn.model_selection import train_test_split
    # Split data into train (3/4th of data) and test (1/4th of data)
    return train_test_split(data, target, train_size = 0.75, random_state=0);



In [24]:

    
def regr_equation(logreg, train, target):
    if type(model) is LogisticRegression:
        coef = logreg.coef_[0]
        intercept = "{:.2f}".format(logreg.intercept_[0])
    else:
        coef = logreg.coef_
        intercept = "{:.2f}".format(logreg.intercept_)
        
    output = target.name + ' = ' + str(intercept) + ' + '
    coeff_df = pd.DataFrame(train.columns.delete(0))
    coeff_df.columns = ['Feature']
    coeff_df["Correlation"] = pd.Series(logreg.coef_[0])
    features = coeff_df['Feature'].tolist()
    coefficients = coeff_df['Correlation'].tolist()
    
    for coeff, feature in zip(coefficients, features):
        coeff_str = "{:.2f}".format(coeff)
        output += coeff_str + "*" + str(feature) + " + "
    return output[:-3]



In [25]:

    
def confusion_matrix(model, X, y):
    from sklearn.metrics import confusion_matrix
    return confusion_matrix(y, model.predict(X))



In [26]:

    
def test_model (model, data, target, submission_name = None, test=None):
   
    if test is None:
        from sklearn.metrics import accuracy_score, confusion_matrix, r2_score
        train, test, target_train, target_test = split_data(data, target)
        train = train.drop('RESULT', 1)
        test = test.drop('RESULT', 1)
        model.fit(train, target_train)
        #Calc parameters
        if type(model) is LogisticRegression:
            function_str = regr_equation(model, train, target_train)
        elif type(model) is LinearRegression:
            function_str = regr_equation(model, train, target_train)
        else :
            function_str = "NA"    

        if type(model) is not LinearRegression:
            trainset_acc = round(accuracy_score(target_train, model.predict(train)) * 100, 2)
            testset_acc = round(accuracy_score(target_test, model.predict(test)) * 100, 2)
            conf_matrix = confusion_matrix(target_train, model.predict(train))
            conf_matrix = 'TN: '+str(conf_matrix[0][0])+', FP: '+str(conf_matrix[0][1])+ \
                              ', FN: '+str(conf_matrix[1][0])+', TP: '+str(conf_matrix[1][1])
        else:
            trainset_acc = 'NA'
            testset_acc = 'NA'
            conf_matrix = "NA"

        r2_score = r2_score(target_train, model.predict(train))
        kaggle = "not_tested"
        
          #Perform k-fold cross-validation with 5 folds
        from sklearn.cross_validation import KFold   #For K-fold cross validation
        kf = KFold(data.shape[0], n_folds=5)
        error = []
        predictors = list(data.columns.values)
        del predictors[-1]
        for train, test in kf:
            # Filter training data
            train_predictors = (data[predictors].iloc[train,:])

            # The target we're using to train the algorithm.
            train_target = data['RESULT'].iloc[train]

            # Training the algorithm using the predictors and target.
            model.fit(train_predictors, train_target)

            #Record error from each cross-validation run
            error.append(model.score(data[predictors].iloc[test,:], data['RESULT'].iloc[test]))

        #Fit the model again so that it can be refered
        #prints
        print("-"*40)
        print('Submission name:', submission_name )
        print('Regression function:\n', function_str)
        print('Accuracy on train set:', trainset_acc,"%")
        print('Accuracy on test set:', testset_acc,"%")
        print ("Cross-Validation Score : %s" % "{0:.3%}".format(np.mean(error)))
        print("R2 score:", r2_score)
        print("Confusion matrix:\n", conf_matrix)
        return [submission_name, function_str, str(trainset_acc), str(testset_acc), r2_score, conf_matrix, np.mean(error)]

SETUP



In [28]:

    
#Setup
suffix = '_droppedMeaningless_noScaler_addedParams_playerOverallRating'
modelMLP = MLPClassifier(hidden_layer_sizes=(104, 104, 104), activation='logistic', solver='sgd', alpha=0.0001, 
                         batch_size='auto', learning_rate='adaptive', learning_rate_init=1, power_t=0.5, 
                         max_iter=2000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, 
                         momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, 
                         beta_1=0.9, beta_2=0.999, epsilon=1e-08)
modelLogReg = LogisticRegression(n_jobs = -1)
modelLinReg = LinearRegression()
modelSVC = SVC()
modellinSVC = LinearSVC()
modelKN = KNeighborsClassifier(n_neighbors = 3)
modelGNB = GaussianNB()
modelPercp = Perceptron()
modelSGD = SGDClassifier()
modelTree = DecisionTreeClassifier(criterion='gini', splitter='random', max_depth=None, 
                                   min_samples_split=16, min_samples_leaf=8, min_weight_fraction_leaf=0.0, 
                                   max_features='sqrt', random_state=None, max_leaf_nodes=None, 
                                   min_impurity_split=1e-07, class_weight=None, presort=False)
modelRndForest = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=None, min_samples_split=2, 
                                        min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                        max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=True, oob_score=False,
                                        n_jobs=-1, random_state=None, verbose=0, warm_start=False, class_weight=None)

modelXTree = ExtraTreesClassifier(n_estimators=500, criterion='gini', max_depth=None, min_samples_split=16, 
                                  min_samples_leaf=5, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                  max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=False, oob_score=False, 
                                  n_jobs=-1, random_state=None, verbose=0, warm_start=False, class_weight=None)

models = {
          modelRndForest: "RandomForest",
          modelLogReg : "logreg",
          modelXTree : 'xTrees'
         }



In [30]:

    
outputs = []
for model, prefix in models.items():
    name=prefix+suffix
    outputs.append(test_model(model, ml_matches_df, target='RESULT', submission_name=name))
outputs.sort(key=lambda x: x[0])









    



C:\ProgramData\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)






    



----------------------------------------
Submission name: RandomForest_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 NA
Accuracy on train set: 100.0 %
Accuracy on test set: 53.19 %
Cross-Validation Score : 52.587%
R2 score: 1.0
Confusion matrix:
 TN: 3533, FP: 0, FN: 0, TP: 3997
----------------------------------------
Submission name: logreg_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 RESULT = -0.00 + -0.00*league_id + -0.00*season + 0.00*stage + -0.00*date + -0.00*home_team_api_id + -0.00*away_team_api_id + 0.00*home_player_X1 + -0.00*home_player_X2 + -0.00*home_player_X3 + -0.00*home_player_X4 + -0.00*home_player_X5 + -0.00*home_player_X6 + -0.00*home_player_X7 + -0.00*home_player_X8 + -0.00*home_player_X9 + -0.00*home_player_X10 + -0.00*home_player_X11 + -0.00*away_player_X1 + -0.00*away_player_X2 + -0.00*away_player_X3 + -0.00*away_player_X4 + -0.00*away_player_X5 + -0.00*away_player_X6 + -0.00*away_player_X7 + -0.00*away_player_X8 + -0.00*away_player_X9 + -0.00*away_player_X10 + -0.00*away_player_X11 + -0.00*home_player_Y1 + -0.00*home_player_Y2 + -0.00*home_player_Y3 + -0.00*home_player_Y4 + -0.00*home_player_Y5 + -0.00*home_player_Y6 + -0.00*home_player_Y7 + -0.00*home_player_Y8 + -0.00*home_player_Y9 + -0.00*home_player_Y10 + -0.00*home_player_Y11 + -0.00*away_player_Y1 + -0.00*away_player_Y2 + -0.00*away_player_Y3 + -0.00*away_player_Y4 + -0.00*away_player_Y5 + -0.00*away_player_Y6 + -0.00*away_player_Y7 + -0.00*away_player_Y8 + -0.00*away_player_Y9 + -0.00*away_player_Y10 + -0.00*away_player_Y11 + -0.00*home_player_1 + 0.00*home_player_2 + 0.00*home_player_3 + -0.00*home_player_4 + -0.00*home_player_5 + 0.00*home_player_6 + 0.00*home_player_7 + -0.00*home_player_8 + 0.00*home_player_9 + 0.00*home_player_10 + 0.00*home_player_11 + 0.00*away_player_1 + -0.00*away_player_2 + 0.00*away_player_3 + -0.00*away_player_4 + 0.00*away_player_5 + -0.00*away_player_6 + -0.00*away_player_7 + -0.00*away_player_8 + -0.00*away_player_9 + -0.00*away_player_10 + 0.00*away_player_11 + -0.00*B365H + -0.00*B365D + -0.00*B365A + -0.00*BWH + -0.00*BWD + -0.00*BWA + -0.00*IWH + -0.00*IWD + -0.00*IWA + -0.00*LBH + -0.00*LBD + -0.00*LBA + -0.00*PSH + -0.00*PSD + -0.00*PSA + -0.00*WHH + -0.00*WHD + -0.00*WHA + -0.00*SJH + -0.00*SJD + -0.00*SJA + -0.00*VCH + -0.00*VCD + -0.00*VCA + -0.00*GBH + -0.00*GBD + -0.00*GBA + -0.00*BSH + -0.00*BSD + -0.00*BSA + -0.00*home_player_1_overall_rating + -0.00*home_player_2_overall_rating + -0.00*home_player_3_overall_rating + -0.00*home_player_4_overall_rating + -0.00*home_player_5_overall_rating + -0.00*home_player_6_overall_rating + -0.00*home_player_7_overall_rating + -0.00*home_player_8_overall_rating + -0.00*home_player_9_overall_rating + -0.00*home_player_10_overall_rating + -0.00*home_player_11_overall_rating + -0.00*away_player_1_overall_rating + -0.00*away_player_2_overall_rating + -0.00*away_player_3_overall_rating + -0.00*away_player_4_overall_rating + -0.00*away_player_5_overall_rating + -0.00*away_player_6_overall_rating + -0.00*away_player_7_overall_rating + -0.00*away_player_8_overall_rating + -0.00*away_player_9_overall_rating + -0.00*away_player_10_overall_rating + -0.00*away_player_11_overall_rating
Accuracy on train set: 52.83 %
Accuracy on test set: 54.4 %
Cross-Validation Score : 51.853%
R2 score: -0.555559624099
Confusion matrix:
 TN: 6, FP: 881, FN: 4, TP: 1789
----------------------------------------
Submission name: xTrees_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 NA
Accuracy on train set: 84.25 %
Accuracy on test set: 53.17 %
Cross-Validation Score : 53.051%
R2 score: 0.500393247669
Confusion matrix:
 TN: 2412, FP: 298, FN: 2, TP: 3201



In [31]:

    
for out in outputs:
    super_table.append(out)



In [30]:

    
make_table(super_table)
apply_theme('basic')









    Out[30]:




Name Regression&nbspfunction Train&nbspAcc Validation&nbspAcc r2_score conf_matrix Cross-Valid.
DTree_firstTry_justdroppedAllThere nan 100.0 100.0 1.0 TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp380 0.9971639196497554
LogReg_firstTry_justdroppedAllThere RESULT =&nbsp0.00 +&nbsp0.00*country_id +&nbsp0.00*league_id +&nbsp0.00*season +&nbsp0.00*stage +&nbsp-0.05*date +&nbsp0.01*match_api_id +&nbsp-0.00*home_team_api_id +&nbsp0.00*away_team_api_id +&nbsp-0.00*home_team_goal +&nbsp0.42*away_team_goal +&nbsp-0.87*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp0.01*home_player_X3 +&nbsp-0.02*home_player_X4 +&nbsp-0.01*home_player_X5 +&nbsp-0.06*home_player_X6 +&nbsp-0.02*home_player_X7 +&nbsp0.05*home_player_X8 +&nbsp0.03*home_player_X9 +&nbsp0.01*home_player_X10 +&nbsp0.02*home_player_X11 +&nbsp0.07*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp0.02*away_player_X4 +&nbsp-0.03*away_player_X5 +&nbsp0.06*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp0.05*away_player_X8 +&nbsp0.11*away_player_X9 +&nbsp0.07*away_player_X10 +&nbsp-0.01*away_player_X11 +&nbsp-0.05*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp0.00*home_player_Y4 +&nbsp0.00*home_player_Y5 +&nbsp0.02*home_player_Y6 +&nbsp0.00*home_player_Y7 +&nbsp-0.08*home_player_Y8 +&nbsp-0.04*home_player_Y9 +&nbsp-0.01*home_player_Y10 +&nbsp-0.04*home_player_Y11 +&nbsp-0.03*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.00*away_player_Y5 +&nbsp-0.01*away_player_Y6 +&nbsp0.04*away_player_Y7 +&nbsp-0.05*away_player_Y8 +&nbsp0.00*away_player_Y9 +&nbsp0.02*away_player_Y10 +&nbsp0.02*away_player_Y11 +&nbsp0.07*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp0.00*home_player_5 +&nbsp-0.00*home_player_6 +&nbsp-0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp-0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp-0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp0.00*away_player_8 +&nbsp-0.00*away_player_9 +&nbsp0.00*away_player_10 +&nbsp0.00*away_player_11 +&nbsp-0.00*goal +&nbsp-0.00*shoton +&nbsp-0.00*shotoff +&nbsp-0.00*foulcommit +&nbsp0.00*card +&nbsp-0.00*cross +&nbsp-0.00*corner +&nbsp-0.00*possession +&nbsp-0.00*B365H +&nbsp0.11*B365D +&nbsp0.05*B365A +&nbsp0.09*BWH +&nbsp0.03*BWD +&nbsp-0.01*BWA +&nbsp-0.01*IWH +&nbsp-0.05*IWD +&nbsp-0.01*IWA +&nbsp-0.03*LBH +&nbsp0.01*LBD +&nbsp-0.02*LBA +&nbsp0.01*PSH +&nbsp0.00*PSD +&nbsp-0.00*PSA +&nbsp0.00*WHH +&nbsp0.01*WHD +&nbsp0.00*WHA +&nbsp0.03*SJH +&nbsp-0.02*SJD +&nbsp0.02*SJA +&nbsp0.01*VCH +&nbsp-0.05*VCD +&nbsp0.00*VCA +&nbsp-0.05*GBH +&nbsp0.03*GBD +&nbsp-0.01*GBA +&nbsp-0.01*BSH +&nbsp-0.05*BSD +&nbsp0.07*BSA +&nbsp0.00*RESULT 100.0 100.0 1.0 TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp380 0.8656563868143188
MLPerc_firstTry_justdroppedAllThere nan 69.34 48.53 0.06720900266300145 TN:&nbsp212,&nbspFP:&nbsp71,&nbspFN:&nbsp48,&nbspTP:&nbsp290 0.4472685423641514
RandomForest_firstTry_justdroppedAllThere nan 99.92 96.6 0.9988652177647968 TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp379 0.7560294875096576
xTrees_firstTry_justdroppedAllThere nan 100.0 97.28 1.0 TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp380 0.6283109065155807
DTree_firstTry_droppedMeaningless nan 100.0 100.0 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4133221310957079
LogReg_firstTry_droppedMeaningless RESULT =&nbsp0.00 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp0.00*stage +&nbsp-0.00*date +&nbsp0.00*home_team_api_id +&nbsp0.00*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp-0.01*home_player_X4 +&nbsp0.00*home_player_X5 +&nbsp-0.01*home_player_X6 +&nbsp0.01*home_player_X7 +&nbsp-0.00*home_player_X8 +&nbsp0.01*home_player_X9 +&nbsp-0.01*home_player_X10 +&nbsp0.01*home_player_X11 +&nbsp0.02*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp0.01*away_player_X5 +&nbsp0.00*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp-0.00*away_player_X8 +&nbsp0.00*away_player_X9 +&nbsp0.01*away_player_X10 +&nbsp0.00*away_player_X11 +&nbsp-0.01*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.00*home_player_Y4 +&nbsp-0.00*home_player_Y5 +&nbsp0.00*home_player_Y6 +&nbsp0.00*home_player_Y7 +&nbsp0.01*home_player_Y8 +&nbsp-0.00*home_player_Y9 +&nbsp0.01*home_player_Y10 +&nbsp0.01*home_player_Y11 +&nbsp0.00*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.00*away_player_Y5 +&nbsp0.01*away_player_Y6 +&nbsp0.01*away_player_Y7 +&nbsp0.01*away_player_Y8 +&nbsp0.01*away_player_Y9 +&nbsp-0.00*away_player_Y10 +&nbsp0.01*away_player_Y11 +&nbsp0.02*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp0.00*home_player_5 +&nbsp0.00*home_player_6 +&nbsp-0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp-0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp-0.00*away_player_8 +&nbsp0.00*away_player_9 +&nbsp0.00*away_player_10 +&nbsp-0.00*away_player_11 +&nbsp-0.00*B365H +&nbsp0.01*B365D +&nbsp-0.01*B365A +&nbsp0.00*BWH +&nbsp-0.02*BWD +&nbsp-0.00*BWA +&nbsp0.03*IWH +&nbsp-0.01*IWD +&nbsp-0.00*IWA +&nbsp0.01*LBH +&nbsp0.01*LBD +&nbsp-0.00*LBA +&nbsp0.01*PSH +&nbsp0.00*PSD +&nbsp-0.00*PSA +&nbsp-0.00*WHH +&nbsp-0.01*WHD +&nbsp-0.01*WHA +&nbsp0.01*SJH +&nbsp-0.00*SJD +&nbsp0.00*SJA +&nbsp0.00*VCH +&nbsp0.00*VCD +&nbsp0.00*VCA +&nbsp0.01*GBH +&nbsp-0.00*GBD +&nbsp0.00*GBA +&nbsp-0.00*BSH +&nbsp-0.00*BSD +&nbsp0.00*BSA +&nbsp-0.00*RESULT 82.4 81.56 0.7368259494499363 TN:&nbsp3387,&nbspFP:&nbsp317,&nbspFN:&nbsp366,&nbspTP:&nbsp2231 0.5285518044042797
MLPerc_firstTry_droppedMeaningless nan 28.98 28.34 -2.028336257123525 TN:&nbsp3498,&nbspFP:&nbsp203,&nbspFN:&nbsp3483,&nbspTP:&nbsp725 0.4508442703441989
RandomForest_firstTry_droppedMeaningless nan 99.97 98.22 0.9995922942671572 TN:&nbsp3703,&nbspFP:&nbsp1,&nbspFN:&nbsp3,&nbspTP:&nbsp4231 0.4630133052375883
xTrees_firstTry_droppedMeaningless nan 100.0 99.75 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4639329496299106
DTree_firstTry_droppedMeaningless_StandardScaler nan 100.0 100.0 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4123000804303415
LogReg_firstTry_droppedMeaningless_StandardScaler RESULT =&nbsp-5.38 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp-0.01*stage +&nbsp-0.02*date +&nbsp-0.02*home_team_api_id +&nbsp0.01*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.00*home_player_X3 +&nbsp-0.01*home_player_X4 +&nbsp-0.00*home_player_X5 +&nbsp-0.03*home_player_X6 +&nbsp0.02*home_player_X7 +&nbsp0.01*home_player_X8 +&nbsp-0.02*home_player_X9 +&nbsp0.03*home_player_X10 +&nbsp0.02*home_player_X11 +&nbsp0.03*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.01*away_player_X3 +&nbsp0.02*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.02*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.00*away_player_X9 +&nbsp-0.01*away_player_X10 +&nbsp-0.02*away_player_X11 +&nbsp0.02*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp0.02*home_player_Y4 +&nbsp0.07*home_player_Y5 +&nbsp-0.01*home_player_Y6 +&nbsp0.02*home_player_Y7 +&nbsp-0.00*home_player_Y8 +&nbsp-0.05*home_player_Y9 +&nbsp0.04*home_player_Y10 +&nbsp0.02*home_player_Y11 +&nbsp0.02*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.04*away_player_Y5 +&nbsp0.00*away_player_Y6 +&nbsp0.01*away_player_Y7 +&nbsp0.01*away_player_Y8 +&nbsp0.00*away_player_Y9 +&nbsp-0.03*away_player_Y10 +&nbsp0.02*away_player_Y11 +&nbsp0.06*home_player_1 +&nbsp0.01*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.02*home_player_4 +&nbsp0.03*home_player_5 +&nbsp0.01*home_player_6 +&nbsp-0.01*home_player_7 +&nbsp-0.01*home_player_8 +&nbsp-0.00*home_player_9 +&nbsp0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp-0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.01*away_player_3 +&nbsp-0.01*away_player_4 +&nbsp0.01*away_player_5 +&nbsp-0.01*away_player_6 +&nbsp0.02*away_player_7 +&nbsp0.01*away_player_8 +&nbsp-0.02*away_player_9 +&nbsp-0.00*away_player_10 +&nbsp-0.00*away_player_11 +&nbsp0.00*B365H +&nbsp-0.02*B365D +&nbsp0.04*B365A +&nbsp0.03*BWH +&nbsp-0.02*BWD +&nbsp0.08*BWA +&nbsp0.12*IWH +&nbsp-0.01*IWD +&nbsp0.00*IWA +&nbsp-0.01*LBH +&nbsp0.01*LBD +&nbsp-0.01*LBA +&nbsp-0.01*PSH +&nbsp-0.03*PSD +&nbsp-0.01*PSA +&nbsp0.04*WHH +&nbsp-0.02*WHD +&nbsp-0.01*WHA +&nbsp0.03*SJH +&nbsp-0.01*SJD +&nbsp-0.04*SJA +&nbsp0.01*VCH +&nbsp-0.04*VCD +&nbsp0.06*VCA +&nbsp0.03*GBH +&nbsp0.01*GBD +&nbsp-0.07*GBA +&nbsp-0.00*BSH +&nbsp0.01*BSD +&nbsp-0.03*BSA +&nbsp0.02*RESULT 100.0 99.98 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.5259445469639505
MLPerc_firstTry_droppedMeaningless_StandardScaler nan 100.0 99.96 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4587692589904033
RandomForest_firstTry_droppedMeaningless_StandardScaler nan 99.97 98.0 0.9994903678339464 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp4,&nbspTP:&nbsp4230 0.4734931592400561
xTrees_firstTry_droppedMeaningless_StandardScaler nan 100.0 98.98 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4553448810190143
logreg_firstTry_droppedMeaningless_StandardScaler_corr RESULT =&nbsp-1.12 +&nbsp-0.01*league_id +&nbsp-0.01*season +&nbsp0.49*stage +&nbsp0.01*date +&nbsp-0.47*home_team_api_id +&nbsp-0.00*away_team_api_id +&nbsp-0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp0.00*home_player_X4 +&nbsp0.01*home_player_X5 +&nbsp0.03*home_player_X6 +&nbsp0.14*home_player_X7 +&nbsp0.12*home_player_X8 +&nbsp0.10*home_player_X9 +&nbsp0.17*home_player_X10 +&nbsp0.19*home_player_X11 +&nbsp0.21*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp0.01*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.09*away_player_X6 +&nbsp0.05*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.02*away_player_X9 +&nbsp-0.05*away_player_X10 +&nbsp0.01*away_player_X11 +&nbsp0.00*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.01*home_player_Y4 +&nbsp-0.10*home_player_Y5 +&nbsp-0.03*home_player_Y6 +&nbsp0.04*home_player_Y7 +&nbsp0.03*home_player_Y8 +&nbsp-0.06*home_player_Y9 +&nbsp-0.03*home_player_Y10 +&nbsp0.04*home_player_Y11 +&nbsp0.09*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.01*away_player_Y5 +&nbsp0.06*away_player_Y6 +&nbsp0.07*away_player_Y7 +&nbsp0.00*away_player_Y8 +&nbsp-0.01*away_player_Y9 +&nbsp-0.09*away_player_Y10 +&nbsp0.10*away_player_Y11 +&nbsp0.10*home_player_1 +&nbsp-0.01*home_player_2 +&nbsp0.03*home_player_3 +&nbsp-0.03*home_player_4 +&nbsp0.02*home_player_5 +&nbsp0.03*home_player_6 +&nbsp-0.02*home_player_7 +&nbsp-0.04*home_player_8 +&nbsp0.04*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.05*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.04*away_player_6 +&nbsp0.00*away_player_7 +&nbsp-0.01*away_player_8 +&nbsp-0.04*away_player_9 +&nbsp0.02*away_player_10 +&nbsp-0.01*away_player_11 +&nbsp0.02*B365H +&nbsp0.36*B365D +&nbsp-0.19*B365A +&nbsp-0.31*BWH +&nbsp0.03*BWD +&nbsp0.01*BWA +&nbsp0.17*IWH +&nbsp0.10*IWD +&nbsp-0.04*IWA +&nbsp-0.01*LBH +&nbsp0.27*LBD +&nbsp0.02*LBA +&nbsp0.03*PSH +&nbsp-0.53*PSD +&nbsp0.06*PSA +&nbsp0.09*WHH +&nbsp-0.32*WHD +&nbsp-0.10*WHA +&nbsp0.02*SJH +&nbsp-0.19*SJD +&nbsp0.02*SJA +&nbsp-0.16*VCH +&nbsp0.32*VCD +&nbsp-0.04*VCA +&nbsp0.01*GBH +&nbsp-0.10*GBD +&nbsp-0.07*GBA +&nbsp-0.27*BSH +&nbsp-0.12*BSD +&nbsp-0.01*BSA 53.57 51.89 -0.5115690045148893 TN:&nbsp109,&nbspFP:&nbsp1008,&nbspFN:&nbsp72,&nbspTP:&nbsp2038 0.5259445469639505
DTree_firstTry_droppedMeaningless_StandardScaler_addedParams nan 61.38 45.68 -0.2456429402681364 TN:&nbsp1683,&nbspFP:&nbsp768,&nbspFN:&nbsp758,&nbspTP:&nbsp2404 0.4560088886240977
MLPerc_firstTry_droppedMeaningless_StandardScaler_addedParams nan 99.99 41.57 0.9998980735667892 TN:&nbsp3703,&nbspFP:&nbsp1,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.4634243283230301
RandomForest_firstTry_droppedMeaningless_StandardScaler_addedParams nan 100.0 51.81 1.0 TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp4234 0.5266602541891456
xTrees_firstTry_droppedMeaningless_StandardScaler_addedParams nan 82.83 52.34 0.454285876589837 TN:&nbsp2409,&nbspFP:&nbsp350,&nbspFN:&nbsp3,&nbspTP:&nbsp3331 0.5280405830912323
logreg_balanced_firstTry_droppedMeaningless_StandardScaler_addedParams RESULT =&nbsp-0.85 +&nbsp-0.01*league_id +&nbsp-0.01*season +&nbsp0.54*stage +&nbsp0.02*date +&nbsp-0.52*home_team_api_id +&nbsp-0.01*away_team_api_id +&nbsp-0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp0.00*home_player_X4 +&nbsp0.02*home_player_X5 +&nbsp0.03*home_player_X6 +&nbsp0.14*home_player_X7 +&nbsp0.12*home_player_X8 +&nbsp0.10*home_player_X9 +&nbsp0.17*home_player_X10 +&nbsp0.19*home_player_X11 +&nbsp0.21*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp0.01*away_player_X3 +&nbsp-0.01*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.08*away_player_X6 +&nbsp0.05*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.03*away_player_X9 +&nbsp-0.05*away_player_X10 +&nbsp0.00*away_player_X11 +&nbsp0.00*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.01*home_player_Y4 +&nbsp-0.11*home_player_Y5 +&nbsp-0.02*home_player_Y6 +&nbsp0.04*home_player_Y7 +&nbsp0.03*home_player_Y8 +&nbsp-0.06*home_player_Y9 +&nbsp-0.03*home_player_Y10 +&nbsp0.04*home_player_Y11 +&nbsp0.09*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.01*away_player_Y5 +&nbsp0.06*away_player_Y6 +&nbsp0.07*away_player_Y7 +&nbsp0.00*away_player_Y8 +&nbsp-0.01*away_player_Y9 +&nbsp-0.09*away_player_Y10 +&nbsp0.10*away_player_Y11 +&nbsp0.11*home_player_1 +&nbsp-0.01*home_player_2 +&nbsp0.03*home_player_3 +&nbsp-0.03*home_player_4 +&nbsp0.02*home_player_5 +&nbsp0.03*home_player_6 +&nbsp-0.01*home_player_7 +&nbsp-0.04*home_player_8 +&nbsp0.04*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.05*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.04*away_player_6 +&nbsp0.00*away_player_7 +&nbsp-0.01*away_player_8 +&nbsp-0.04*away_player_9 +&nbsp0.02*away_player_10 +&nbsp-0.01*away_player_11 +&nbsp0.02*B365H +&nbsp0.37*B365D +&nbsp-0.19*B365A +&nbsp-0.32*BWH +&nbsp0.03*BWD +&nbsp0.01*BWA +&nbsp0.17*IWH +&nbsp0.10*IWD +&nbsp-0.04*IWA +&nbsp-0.01*LBH +&nbsp0.28*LBD +&nbsp0.02*LBA +&nbsp0.02*PSH +&nbsp-0.54*PSD +&nbsp0.06*PSA +&nbsp0.09*WHH +&nbsp-0.33*WHD +&nbsp-0.10*WHA +&nbsp0.02*SJH +&nbsp-0.19*SJD +&nbsp0.02*SJA +&nbsp-0.16*VCH +&nbsp0.32*VCD +&nbsp-0.04*VCA +&nbsp0.01*GBH +&nbsp-0.10*GBD +&nbsp-0.07*GBA +&nbsp-0.27*BSH +&nbsp-0.13*BSD +&nbsp-0.01*BSA 51.68 50.13 -0.5593725016907143 TN:&nbsp799,&nbspFP:&nbsp1161,&nbspFN:&nbsp677,&nbspTP:&nbsp2244 0.5049333353192677
RandomForest_droppedMeaningless_noScaler_addedParams_playerOverallRating nan 100.0 53.19 1.0 TN:&nbsp3533,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp3997 0.5258748380967383
logreg_droppedMeaningless_noScaler_addedParams_playerOverallRating RESULT =&nbsp-0.00 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp0.00*stage +&nbsp-0.00*date +&nbsp-0.00*home_team_api_id +&nbsp-0.00*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp-0.00*home_player_X2 +&nbsp-0.00*home_player_X3 +&nbsp-0.00*home_player_X4 +&nbsp-0.00*home_player_X5 +&nbsp-0.00*home_player_X6 +&nbsp-0.00*home_player_X7 +&nbsp-0.00*home_player_X8 +&nbsp-0.00*home_player_X9 +&nbsp-0.00*home_player_X10 +&nbsp-0.00*home_player_X11 +&nbsp-0.00*away_player_X1 +&nbsp-0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp-0.00*away_player_X5 +&nbsp-0.00*away_player_X6 +&nbsp-0.00*away_player_X7 +&nbsp-0.00*away_player_X8 +&nbsp-0.00*away_player_X9 +&nbsp-0.00*away_player_X10 +&nbsp-0.00*away_player_X11 +&nbsp-0.00*home_player_Y1 +&nbsp-0.00*home_player_Y2 +&nbsp-0.00*home_player_Y3 +&nbsp-0.00*home_player_Y4 +&nbsp-0.00*home_player_Y5 +&nbsp-0.00*home_player_Y6 +&nbsp-0.00*home_player_Y7 +&nbsp-0.00*home_player_Y8 +&nbsp-0.00*home_player_Y9 +&nbsp-0.00*home_player_Y10 +&nbsp-0.00*home_player_Y11 +&nbsp-0.00*away_player_Y1 +&nbsp-0.00*away_player_Y2 +&nbsp-0.00*away_player_Y3 +&nbsp-0.00*away_player_Y4 +&nbsp-0.00*away_player_Y5 +&nbsp-0.00*away_player_Y6 +&nbsp-0.00*away_player_Y7 +&nbsp-0.00*away_player_Y8 +&nbsp-0.00*away_player_Y9 +&nbsp-0.00*away_player_Y10 +&nbsp-0.00*away_player_Y11 +&nbsp-0.00*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp-0.00*home_player_5 +&nbsp0.00*home_player_6 +&nbsp0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp-0.00*away_player_8 +&nbsp-0.00*away_player_9 +&nbsp-0.00*away_player_10 +&nbsp0.00*away_player_11 +&nbsp-0.00*B365H +&nbsp-0.00*B365D +&nbsp-0.00*B365A +&nbsp-0.00*BWH +&nbsp-0.00*BWD +&nbsp-0.00*BWA +&nbsp-0.00*IWH +&nbsp-0.00*IWD +&nbsp-0.00*IWA +&nbsp-0.00*LBH +&nbsp-0.00*LBD +&nbsp-0.00*LBA +&nbsp-0.00*PSH +&nbsp-0.00*PSD +&nbsp-0.00*PSA +&nbsp-0.00*WHH +&nbsp-0.00*WHD +&nbsp-0.00*WHA +&nbsp-0.00*SJH +&nbsp-0.00*SJD +&nbsp-0.00*SJA +&nbsp-0.00*VCH +&nbsp-0.00*VCD +&nbsp-0.00*VCA +&nbsp-0.00*GBH +&nbsp-0.00*GBD +&nbsp-0.00*GBA +&nbsp-0.00*BSH +&nbsp-0.00*BSD +&nbsp-0.00*BSA +&nbsp-0.00*home_player_1_overall_rating +&nbsp-0.00*home_player_2_overall_rating +&nbsp-0.00*home_player_3_overall_rating +&nbsp-0.00*home_player_4_overall_rating +&nbsp-0.00*home_player_5_overall_rating +&nbsp-0.00*home_player_6_overall_rating +&nbsp-0.00*home_player_7_overall_rating +&nbsp-0.00*home_player_8_overall_rating +&nbsp-0.00*home_player_9_overall_rating +&nbsp-0.00*home_player_10_overall_rating +&nbsp-0.00*home_player_11_overall_rating +&nbsp-0.00*away_player_1_overall_rating +&nbsp-0.00*away_player_2_overall_rating +&nbsp-0.00*away_player_3_overall_rating +&nbsp-0.00*away_player_4_overall_rating +&nbsp-0.00*away_player_5_overall_rating +&nbsp-0.00*away_player_6_overall_rating +&nbsp-0.00*away_player_7_overall_rating +&nbsp-0.00*away_player_8_overall_rating +&nbsp-0.00*away_player_9_overall_rating +&nbsp-0.00*away_player_10_overall_rating +&nbsp-0.00*away_player_11_overall_rating 52.83 54.4 -0.5555596240990914 TN:&nbsp6,&nbspFP:&nbsp881,&nbspFN:&nbsp4,&nbspTP:&nbsp1789 0.5185347625610509
xTrees_droppedMeaningless_noScaler_addedParams_playerOverallRating nan 84.25 53.17 0.5003932476692017 TN:&nbsp2412,&nbspFP:&nbsp298,&nbspFN:&nbsp2,&nbspTP:&nbsp3201 0.530514956098139
RandomForest_droppedMeaningless_StandardScaler_addedParams_Positions_ResultLabel nan 100.0 51.61 1.0 Pred 	Lose	Draw	Win
True 
Lose 	0.46	0.03	0.51
Draw 	0.27	0.04	0.69
Win 	0.16	0.03	0.82 0.523843598262203
logreg_droppedMeaningless_StandardScaler_addedParams_Positions_ResultLabel RESULT =&nbsp-1.11 +&nbsp0.00*league_id +&nbsp0.00*season +&nbsp-0.46*stage +&nbsp-0.05*date +&nbsp0.49*home_team_api_id +&nbsp-0.03*away_team_api_id +&nbsp-0.03*home_player_1_position +&nbsp0.04*home_player_2_position +&nbsp0.02*home_player_3_position +&nbsp0.00*home_player_4_position +&nbsp0.01*home_player_5_position +&nbsp-0.01*home_player_6_position +&nbsp0.00*home_player_7_position +&nbsp0.01*home_player_8_position +&nbsp-0.01*home_player_9_position +&nbsp-0.03*home_player_10_position +&nbsp0.02*home_player_11_position +&nbsp0.00*away_player_1_position +&nbsp0.03*away_player_2_position +&nbsp0.01*away_player_3_position +&nbsp0.00*away_player_4_position +&nbsp0.02*away_player_5_position +&nbsp-0.03*away_player_6_position +&nbsp0.02*away_player_7_position +&nbsp-0.02*away_player_8_position +&nbsp-0.00*away_player_9_position +&nbsp-0.03*away_player_10_position +&nbsp0.00*away_player_11_position +&nbsp0.04*home_player_1 +&nbsp-0.02*home_player_2 +&nbsp0.02*home_player_3 +&nbsp0.01*home_player_4 +&nbsp-0.01*home_player_5 +&nbsp0.00*home_player_6 +&nbsp0.02*home_player_7 +&nbsp-0.01*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.01*home_player_10 +&nbsp-0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.05*away_player_3 +&nbsp-0.01*away_player_4 +&nbsp-0.03*away_player_5 +&nbsp-0.01*away_player_6 +&nbsp0.01*away_player_7 +&nbsp0.03*away_player_8 +&nbsp0.01*away_player_9 +&nbsp0.03*away_player_10 +&nbsp0.02*away_player_11 +&nbsp0.02*B365H +&nbsp0.42*B365D +&nbsp-0.11*B365A +&nbsp-0.12*BWH +&nbsp0.14*BWD +&nbsp-0.11*BWA +&nbsp-0.18*IWH +&nbsp-0.12*IWD +&nbsp0.10*IWA +&nbsp0.07*LBH +&nbsp0.02*LBD +&nbsp0.16*LBA +&nbsp0.25*PSH +&nbsp-0.01*PSD +&nbsp0.39*PSA +&nbsp-0.13*WHH +&nbsp-0.22*WHD +&nbsp0.10*WHA +&nbsp0.20*SJH +&nbsp0.02*SJD +&nbsp0.24*SJA +&nbsp-0.17*VCH +&nbsp0.05*VCD +&nbsp-0.45*VCA +&nbsp-0.03*GBH +&nbsp0.13*GBD +&nbsp0.17*GBA +&nbsp-0.68*BSH +&nbsp-0.08*BSD +&nbsp-0.40*BSA 53.43 51.96 -0.5359402567508378 Pred 	Lose	Draw	Win
True 
Lose 	0.43	0.01	0.56
Draw 	0.23	0.01	0.76
Win 	0.14	0.00	0.86 0.5274214157935088



In [35]:

    
df = pd.DataFrame(super_table[1:], columns=super_table[0])
df.to_csv(r'C:/Python/Soccer/out/results_table.csv', 
                      index=False)

	country_id	league_id	season	stage	date	home_team_api_id	away_team_api_id	home_player_X2	home_player_X3	home_player_X4	home_player_X5	home_player_X6	home_player_X7	home_player_X8	home_player_X9	home_player_X10	home_player_X11	away_player_X2	away_player_X3	away_player_X4	away_player_X5	away_player_X6	away_player_X7	away_player_X8	away_player_X9	away_player_X10	away_player_X11	home_player_Y3	home_player_Y4	home_player_Y5	home_player_Y6	home_player_Y7	home_player_Y8	home_player_Y9	home_player_Y10	home_player_Y11	away_player_Y4	away_player_Y5	away_player_Y6	away_player_Y7	away_player_Y8	away_player_Y9	away_player_Y10	away_player_Y11	home_player_1	home_player_2	home_player_3	home_player_4	home_player_5	home_player_6	home_player_7	home_player_8	home_player_9	home_player_10	home_player_11	away_player_1	away_player_2	away_player_3	away_player_4	away_player_5	away_player_6	away_player_7	away_player_8	away_player_9	away_player_10	away_player_11	B365H	B365D	B365A	BWH	BWD	BWA	IWH	IWD	IWA	LBH	LBD	LBA	PSH	PSD	PSA	WHH	WHD	WHA	SJH	SJD	SJA	VCH	VCD	VCA	GBH	GBD	GBA	BSH	BSD	BSA	RESULT
145	-1.440532	-1.440532	-1.677216	0.515898	-1.559993	-0.135024	-0.001675	-0.198326	-0.154246	-0.115885	0.28336	-1.00501	-0.741363	0.401581	1.127873	-0.949555	0.265198	-0.182678	-0.144603	-0.116614	0.288303	-0.97221	-0.691287	0.41015	1.143707	-0.97159	0.285025	-0.00715	-0.01599	-0.256033	0.721283	0.575381	-0.401031	-0.941504	0.722798	-0.886236	-0.00715	-0.256775	0.715858	0.560073	-0.414719	-0.936542	0.754508	-0.917742	-0.429893	-0.337037	-0.224224	-0.246484	-0.635638	-0.578564	0.497046	-0.334056	0.459801	-0.586835	-0.694314	-0.435937	-0.602667	0.580849	0.112343	0.346244	-0.575405	-0.495636	-0.848021	-0.589961	-0.650718	-0.588404	0.196088	-0.406710	-0.632078	0.226035	-0.494900	-0.665279	0.214631	-0.649964	-0.681214	0.117349	-0.475849	-0.643504	0.082688	-0.538962	-0.626417	0.011502	-0.498430	-0.607742	0.240605	-0.567410	-0.619835	-0.020894	-0.538450	-0.578251	0.177566	-0.495606	-0.628619	0.118295	-0.551762	-0.635042	1
153	-1.440532	-1.440532	-1.677216	0.611261	-1.543611	0.002016	-0.125486	-0.198326	-0.154246	-0.115885	0.28336	-1.00501	-0.741363	0.401581	1.127873	-0.949555	0.265198	-0.182678	-0.144603	-0.116614	0.288303	-0.97221	-0.691287	0.41015	1.143707	-0.97159	0.285025	-0.00715	-0.01599	-0.256033	0.721283	0.575381	-0.401031	-0.941504	0.722798	-0.886236	-0.00715	-0.256775	0.715858	0.560073	-0.414719	-0.936542	0.754508	-0.917742	-0.447044	-0.615512	-0.525151	-0.554597	-0.043088	-0.610629	-0.540628	-0.597685	-0.621749	-0.602553	-0.212502	-0.435331	-0.749476	-0.773347	-0.544503	-0.629337	-0.568294	-0.529328	-0.597484	-0.502916	-0.697882	-0.591531	0.085983	-0.584174	-0.613425	0.316373	-0.734249	-0.665279	0.214631	-0.649964	-0.681214	0.388182	-0.525052	-0.701889	0.129084	-0.586560	-0.636187	0.348779	-0.498430	-0.690366	0.240605	-0.521008	-0.633148	-0.020894	-0.578092	-0.578251	0.177566	-0.543462	-0.628619	0.013545	-0.551762	-0.598635	0
155	-1.440532	-1.440532	-1.677216	0.611261	-1.545951	-0.101783	-0.000697	-0.198326	-0.154246	-0.115885	0.28336	-1.00501	-0.741363	0.401581	1.127873	-0.949555	0.265198	-0.182678	-0.144603	-0.116614	0.288303	-0.97221	-0.691287	0.41015	1.143707	-0.97159	0.285025	-0.00715	-0.01599	-0.256033	0.721283	0.575381	-0.401031	-0.941504	0.722798	-0.886236	-0.00715	-0.256775	0.715858	0.560073	-0.414719	-0.936542	0.754508	-0.917742	-0.474114	-0.603428	-0.627898	-0.790138	-0.624390	-0.628015	-0.536527	-0.594661	-0.621526	-0.587043	-0.596329	-0.436360	-0.606293	-0.513894	-0.544197	-0.135424	-0.569114	-0.544358	0.206842	0.165346	-0.601101	0.259737	-0.684755	0.569343	0.886835	-0.737574	0.223145	1.386640	-0.771947	0.462000	0.987423	-0.719225	0.262203	0.757756	-0.690371	0.220883	0.758224	-0.721204	0.331609	0.686701	-0.709306	0.639040	1.030942	-0.710796	0.333665	0.713752	-0.729034	0.538083	1.156349	-0.719706	0.405835	0.695220	1
162	-1.440532	-1.440532	-1.677216	0.706624	-1.538930	-0.135024	-0.103433	-0.198326	-0.154246	-0.115885	0.28336	-1.00501	-0.741363	0.401581	1.127873	-0.949555	0.265198	-0.182678	-0.144603	-0.116614	0.288303	-0.97221	-0.691287	0.41015	1.143707	-0.97159	0.285025	-0.00715	-0.01599	-0.256033	0.721283	0.575381	-0.401031	-0.941504	0.722798	-0.886236	-0.00715	-0.256775	0.715858	0.560073	-0.414719	-0.936542	0.754508	-0.917742	-0.429893	-0.337037	-0.224224	-0.537152	-0.364475	-0.572033	-0.258451	0.372623	0.459801	-0.633408	-0.694314	-0.475463	-0.601821	-0.509075	-0.609070	0.491585	-0.632340	-0.540622	-0.597078	-0.622279	-0.600904	-0.604069	1.021879	-0.229246	-0.778640	1.279982	-0.447031	-0.816156	1.167190	-0.526413	-0.855030	0.869664	-0.328238	-0.783630	0.895184	-0.435111	-0.761810	1.005888	-0.394675	-0.772990	1.196138	-0.288998	-0.771600	0.847871	-0.498808	-0.715455	1.113411	-0.352039	-0.786963	0.933018	-0.456002	-0.775070	1
168	-1.440532	-1.440532	-1.677216	0.706624	-1.536590	0.003247	-0.000772	-0.198326	-0.154246	-0.115885	0.28336	-1.00501	-0.741363	0.401581	1.127873	-0.949555	0.265198	-0.182678	-0.144603	-0.116614	0.288303	-0.97221	-0.691287	0.41015	1.143707	-0.97159	0.285025	-0.00715	-0.01599	-0.256033	0.721283	0.575381	-0.401031	-0.941504	0.722798	-0.886236	-0.00715	-0.256775	0.715858	0.560073	-0.414719	-0.936542	0.754508	-0.917742	-0.434801	-0.607952	-0.524383	-0.546104	-0.628794	-0.564939	-0.540402	0.225332	0.178055	-0.591996	0.265646	-0.431580	-0.603077	-0.727231	0.012434	-0.133170	-0.496828	-0.650817	-0.637746	0.454519	-0.603662	-0.635590	-0.492071	-0.317978	0.020788	-0.587010	-0.207682	0.194716	-0.567828	-0.279310	0.014051	-0.568762	-0.475849	0.173898	-0.530308	-0.374531	-0.018961	-0.540935	-0.290920	-0.048653	-0.535062	-0.288998	0.099052	-0.531932	-0.340242	-0.086604	-0.553563	-0.256327	0.148706	-0.521845	-0.360243	-0.052527	-1