IMPORT & LOAD


In [26]:
####Import libraries
# SQL
import sqlite3
# Data Manipulation
import numpy as np
import pandas as pd
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno   #NaNs
#Nice Tables
from ipy_table import *

# Import and suppress warnings
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline

In [29]:
#load data (make sure you have downloaded database.sqlite)
with sqlite3.connect(r'C:/Python/Soccer1/database.sqlite') as con:
    country_df = pd.read_sql_query("SELECT * from Country", con)
    matches_df = pd.read_sql_query("SELECT * from Match", con)
    league_df = pd.read_sql_query("SELECT * from League", con)
    team_df = pd.read_sql_query("SELECT * from Team", con)
    player_df = pd.read_sql_query("select * from Player", con)
    player_stats_df = pd.read_sql_query("select * from player_attributes", con)
    
#load past results
super_table=[["Name", 'Regression function', "Train Acc", 'Validation Acc', "r2_score", "conf_matrix", 'Cross-Valid.'],]
df_load = pd.read_csv(r'C:/Python/Soccer/out/results_table.csv')
for row in df_load.values.tolist():
    super_table.append(row)

In [3]:
player_vector = ['home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11']

for player in player_vector:
    player_stat = pd.merge(matches_df, player_stats_df, left_on = player, 
                                right_on = "player_api_id")
    matches_df[player+'_overall_rating'] = player_stat['overall_rating'].copy()

In [4]:
all_df = [country_df, matches_df, league_df, team_df, player_df, player_stats_df]
country_df.dfname = 'country_df'
matches_df.dfname = 'matches_df'
league_df.dfname = 'league_df' 
team_df.dfname = 'team_df' 
player_df.dfname = 'player_df'
player_stats_df.dfname = 'player_stats_df'

DATA EXPLORATION


In [6]:
for df in all_df:
    print(df.dfname)
    print(df.info())
    print('-'*40)


country_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 2 columns):
id      11 non-null int64
name    11 non-null object
dtypes: int64(1), object(1)
memory usage: 256.0+ bytes
None
----------------------------------------
matches_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25979 entries, 0 to 25978
Columns: 115 entries, id to BSA
dtypes: float64(96), int64(9), object(10)
memory usage: 22.8+ MB
None
----------------------------------------
league_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 3 columns):
id            11 non-null int64
country_id    11 non-null int64
name          11 non-null object
dtypes: int64(2), object(1)
memory usage: 344.0+ bytes
None
----------------------------------------
team_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 5 columns):
id                  299 non-null int64
team_api_id         299 non-null int64
team_fifa_api_id    288 non-null float64
team_long_name      299 non-null object
team_short_name     299 non-null object
dtypes: float64(1), int64(2), object(2)
memory usage: 11.8+ KB
None
----------------------------------------
player_df
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11060 entries, 0 to 11059
Data columns (total 7 columns):
id                    11060 non-null int64
player_api_id         11060 non-null int64
player_name           11060 non-null object
player_fifa_api_id    11060 non-null int64
birthday              11060 non-null object
height                11060 non-null float64
weight                11060 non-null int64
dtypes: float64(1), int64(4), object(2)
memory usage: 604.9+ KB
None
----------------------------------------

In [5]:
msno.matrix(matches_df)



In [6]:
matches_df.dropna(thresh=80, inplace=True)

In [7]:
msno.matrix(matches_df)



In [9]:
msno.dendrogram(matches_df)



In [11]:
matches_df.hist(figsize = (100, 100))
plt.show()



In [5]:
bookies = ['B365', 'BW', 'IW', 'LB', 'PS', 'WH', 'SJ', 'VC', 'GB', 'BS']
bookies_H = [bookie+'H' for bookie in bookies]
bookies_A = [bookie+'A' for bookie in bookies]
bookies_D = [bookie+'D' for bookie in bookies]
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}

In [13]:
for home, draw, away in zip(bookies_H,bookies_A,bookies_D):
    fig, ax = plt.subplots()
    for odds in [home, draw, away]:
        sns.distplot(matches_df[odds].dropna(), ax=ax, label=odds, hist = False)
    #set title
    plt.title(home[:-1], fontsize=16)
    #remove x label
    ax.set_xlabel('')
    ax.set_xlim([0, 8])
    plt.show()



In [14]:
plt.rc('legend',fontsize=30)
#_______All bookmakers - Home/Draw/Away odds | KDE + BOXPLOTS
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}
for bookie_type, bookie_list in bookies_types.items():
    fig, axes = plt.subplots(ncols=2, figsize=(30,15))
    axes[0].set_xlim([0, 8])
    if bookie_type=='Home odds':
        axes[0].set_ylim([0, 0.65])
    elif bookie_type =='Draw odds':
        axes[0].set_ylim([0, 2.3])
    else:
        axes[0].set_ylim([0, 0.35])
    for bookie in bookie_list:
        sns.distplot(matches_df[bookie].dropna(), ax = axes[0], label=bookie, hist = False)
    #remove x label
    axes[0].set_xlabel('')
    #locate legend 
    plt.legend(loc='best')
    col_sel = bookie_list
    bookie_sel_df = matches_df[bookie_list]
    axes[1] = sns.boxplot(data=bookie_sel_df, palette='Set2', showmeans=True)
    if bookie_type=='Home odds':
        axes[1].set_ylim([1, 5])
    elif bookie_type =='Draw odds':
        axes[1].set_ylim([1, 10])
    else:
        axes[1].set_ylim([1, 5.5])
    
    plt.suptitle(str(bookie_type), fontsize=80)
    plt.show()



In [15]:
#How many goals - Home\Away
goals_df = matches_df[['home_team_goal', 'away_team_goal']]
color = ['red', 'lime']
fig, ax = plt.subplots()
ax.set_xlim([0, 10])
ax.set_ylim([0, 9500])
sns.distplot(goals_df.dropna(), ax = ax, kde = False, color = color)
plt.show()

#Contingency table
goals_home_vs_away = pd.crosstab(index = matches_df["home_team_goal"],
                                 columns = matches_df["away_team_goal"])
print(goals_home_vs_away)


away_team_goal     0     1     2    3    4   5   6  7  8  9
home_team_goal                                             
0               1678  1580  1026  450  194  58  17  5  3  1
1               2238  2556  1429  640  218  64  17  2  1  0
2               1807  1879  1120  411  144  38  14  1  1  0
3                972  1017   524  230   46  16   7  0  0  0
4                456   412   205   78   26   9   0  0  0  0
5                152   157    62   20    5   2   0  0  0  0
6                 55    52    24    4    1   0   1  0  0  0
7                 20    12     1    3    0   0   0  0  0  0
8                  5     3     1    0    0   0   0  0  0  0
9                  1     2     1    0    0   0   0  0  0  0
10                 1     0     1    0    0   0   0  0  0  0

FEATURE ENGINEERING


In [6]:
#preprocessing libraries
from sklearn.preprocessing import LabelEncoder, StandardScaler, Imputer, OneHotEncoder

In [18]:
def impute_numeric (dataset, formula):
    """
    Impute numeric values in a dataset usinng linear regression
    dataset = Pandas Dataframe
    formula e.g. 'Y ~ X1 + X2'
    """
    import statsmodels.formula.api as smf
    import pandas as pd
    
    lm = smf.ols(formula = formula, data = dataset)
    res = lm.fit()
    
    temp_train = dataset[pd.isnull(dataset).any(axis=1)].copy()
    temp_train = temp_train.drop(formula.split(None, 1)[0], axis=1).copy()
    
    var_pred = res.predict(temp_train)
    var_pred = var_pred.round(decimals=0)
    
    dataset[formula.split(None, 1)[0]].fillna(var_pred, inplace=True)

In [7]:
def label_win (row):
    if row['home_team_goal'] > row['away_team_goal']:
        return 'WIN'
    if row['home_team_goal'] == row['away_team_goal']:
        return 'DRAW'
    if row['home_team_goal'] < row['away_team_goal']:
        return 'LOSE'

matches_df['RESULT'] = matches_df.apply(lambda row: label_win(row), axis=1)

In [8]:
ml_matches_df = matches_df[[ 'country_id', 'league_id', 'season', 'stage', 'date',
       'home_team_api_id', 'away_team_api_id', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'B365H', 'B365D', 'B365A', 'BWH',
       'BWD', 'BWA', 'IWH', 'IWD', 'IWA', 'LBH', 'LBD', 'LBA', 'PSH',
       'PSD', 'PSA', 'WHH', 'WHD', 'WHA', 'SJH', 'SJD', 'SJA', 'VCH',
       'VCD', 'VCA', 'GBH', 'GBD', 'GBA', 'BSH', 'BSD', 'BSA', 'home_player_1_overall_rating', 'home_player_2_overall_rating',
       'home_player_3_overall_rating', 'home_player_4_overall_rating',
       'home_player_5_overall_rating', 'home_player_6_overall_rating',
       'home_player_7_overall_rating', 'home_player_8_overall_rating',
       'home_player_9_overall_rating', 'home_player_10_overall_rating',
       'home_player_11_overall_rating', 'away_player_1_overall_rating',
       'away_player_2_overall_rating', 'away_player_3_overall_rating',
       'away_player_4_overall_rating', 'away_player_5_overall_rating',
       'away_player_6_overall_rating', 'away_player_7_overall_rating',
       'away_player_8_overall_rating', 'away_player_9_overall_rating',
       'away_player_10_overall_rating', 'away_player_11_overall_rating', 'RESULT']]

In [9]:
#dropping NaNs everywhere where it's <300 records
ml_matches_df.dropna(subset = ['B365H', 'BWH', 'IWH', 'WHH', 'VCH', 'LBH', 'home_team_api_id', 'away_team_api_id', 
        'home_player_X1', 'home_player_X2', 'home_player_X3', 'home_player_X4',
        'home_player_X5', 'home_player_X6', 'home_player_X7', 'home_player_X8', 
        'home_player_X9', 'home_player_X10', 'home_player_X11', 'away_player_X1', 
        'away_player_X2', 'away_player_X3', 'away_player_X4', 'away_player_X5',
        'away_player_X6', 'away_player_X7', 'away_player_X8', 'away_player_X9', 
        'away_player_X10', 'away_player_X11', 'home_player_Y1', 'home_player_Y2', 
        'home_player_Y3', 'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
        'home_player_Y7', 'home_player_Y8', 'home_player_Y9', 'home_player_Y10', 
        'home_player_Y11', 'away_player_Y1', 'away_player_Y2', 'away_player_Y3', 
        'away_player_Y4', 'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
        'away_player_Y8', 'away_player_Y9', 'away_player_Y10', 'away_player_Y11', 
        'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
        'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
        'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
        'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
        'away_player_8', 'away_player_9', 'away_player_10','away_player_11',
        'home_player_1', 'home_player_2', 'home_player_1_overall_rating', 'home_player_2_overall_rating',
       'home_player_3_overall_rating', 'home_player_4_overall_rating',
       'home_player_5_overall_rating', 'home_player_6_overall_rating',
       'home_player_7_overall_rating', 'home_player_8_overall_rating',
       'home_player_9_overall_rating', 'home_player_10_overall_rating',
       'home_player_11_overall_rating', 'away_player_1_overall_rating',
       'away_player_2_overall_rating', 'away_player_3_overall_rating',
       'away_player_4_overall_rating', 'away_player_5_overall_rating',
       'away_player_6_overall_rating', 'away_player_7_overall_rating',
       'away_player_8_overall_rating', 'away_player_9_overall_rating',
       'away_player_10_overall_rating', 'away_player_11_overall_rating'
                            ], how='any', inplace=True)

In [13]:
ml_matches_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 18533 entries, 145 to 24556
Columns: 126 entries, country_id to RESULT
dtypes: float64(118), int64(5), object(3)
memory usage: 18.0+ MB

In [10]:
#imputation for less popular bookies
bookies_types = {'Home odds':bookies_H, 'Draw odds':bookies_D, 'Away odds':bookies_A}
for bookie_type, bookie_list in bookies_types.items():
    bookie_sel_df = ml_matches_df[bookie_list]
    bookie_sel_df = bookie_sel_df.apply(lambda x: x.fillna(x.mean()),axis=1)
    ml_matches_df[bookie_list]=bookie_sel_df.copy()

In [11]:
msno.matrix(ml_matches_df)


Feature pre-selection


In [13]:
matches_data = ml_matches_df.drop('RESULT', 1)
matches_target = ml_matches_df[['RESULT']]

In [14]:
ml_matches_df.shape


Out[14]:
(18533, 126)

In [15]:
matches_data.shape


Out[15]:
(18533, 125)

In [16]:
matches_target.shape


Out[16]:
(18533, 1)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(matches_data, np.ravel(matches_target), test_size = 0.20, random_state = 101)

In [31]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
clf = clf.fit(X_train, y_train)

Features based on tree


In [32]:
features_tree = pd.DataFrame()
features_tree['feature'] = matches_data.columns
features_tree['importance'] = clf.feature_importances_
features_tree.sort_values(by=['importance'], ascending=True, inplace=True)
features_tree.set_index('feature', inplace=True)

Features based on chi-square


In [18]:
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
features_chi = pd.DataFrame()
features_chi['feature'] = matches_data.columns
#model = SelectKBest(chi2, k=125)
#fit = model.fit(X_train, y_train)
#np.set_printoptions(precision=3)
#print(fit.scores_)
features_chi2 = chi2(X_train, y_train)

In [19]:
features_chi['importance'] = features_chi2[0]

In [20]:
features_chi.sort_values(by=['importance'], ascending=True, inplace=True)
features_chi.set_index('feature', inplace=True)

In [25]:
features_chi.plot(kind='barh', figsize=(20, 20))


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x2c512010fd0>

In [34]:
features_tree.plot(kind='barh', figsize=(20, 20))


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x17528a2eac8>

Reducing dataset to only relevant features


In [22]:
X_train_reduced = model.transform(X_train)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-9570a5f7d736> in <module>()
----> 1 X_train_reduced = model.transform(X_train)

NameError: name 'model' is not defined

In [82]:
X_train_reduced.shape


Out[82]:
(14826, 73)

In [83]:
X_test_reduced = model.transform(X_test)

In [84]:
X_test_reduced.shape


Out[84]:
(3707, 73)

In [85]:
from sklearn.linear_model import LogisticRegression

GridSearch


In [86]:
param_grid = {'C': [0.1, 1, 10, 100, 1000]}

In [87]:
from sklearn.model_selection import GridSearchCV

In [88]:
grid = GridSearchCV(LogisticRegression(), param_grid, refit=True, verbose = 3)

In [89]:
grid.fit(X_train_reduced, y_train)


Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] C=0.1 ...........................................................
[CV] ............................ C=0.1, score=0.509911, total=   4.6s
[CV] C=0.1 ...........................................................
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.6s remaining:    0.0s
[CV] ............................ C=0.1, score=0.526816, total=   4.7s
[CV] C=0.1 ...........................................................
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    9.4s remaining:    0.0s
[CV] ............................ C=0.1, score=0.529245, total=   4.6s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.520429, total=   4.8s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.529245, total=   5.2s
[CV] C=1 .............................................................
[CV] .............................. C=1, score=0.504351, total=   3.1s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.494539, total=   3.8s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.528840, total=   6.4s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.525400, total=   4.2s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.512136, total=   4.8s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.526614, total=   5.7s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.517102, total=   4.2s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.521036, total=   4.5s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.518519, total=   4.9s
[CV] C=1000 ..........................................................
[CV] ........................... C=1000, score=0.519126, total=   3.7s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:  1.2min finished
Out[89]:
GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 1, 10, 100, 1000]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score=True, scoring=None, verbose=3)

In [90]:
grid.best_params_


Out[90]:
{'C': 0.1}

In [91]:
grid.best_estimator_


Out[91]:
LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [92]:
grid_predictions = grid.predict(X_test_reduced)

In [95]:
from sklearn.metrics import classification_report, confusion_matrix

In [96]:
print(confusion_matrix(y_test, grid_predictions))


[[  11  136  734]
 [  28  333  742]
 [  10  117 1596]]

In [97]:
print(classification_report(y_test, grid_predictions))


             precision    recall  f1-score   support

        0.0       0.22      0.01      0.02       881
        1.0       0.57      0.30      0.39      1103
        2.0       0.52      0.93      0.67      1723

avg / total       0.46      0.52      0.43      3707


In [12]:
#labelling
le = LabelEncoder()

for col in ml_matches_df.columns.values:
   # Encoding only categorical variables
   if ml_matches_df[col].dtypes=='object':
   # Using whole data to form an exhaustive list of levels
       data=ml_matches_df[col].append(ml_matches_df[col])
       le.fit(data.values)
       ml_matches_df[col]=le.transform(ml_matches_df[col])

ml_matches_df['RESULT'] = ml_matches_df['RESULT'].astype(float)

In [27]:
#scale Features
scaled_features = StandardScaler(with_mean=True, with_std=True).fit_transform(ml_matches_df.values)
scaled_features_df = pd.DataFrame(scaled_features, index=ml_matches_df.index, columns=ml_matches_df.columns)

def remap_match_result(x):
    if x < -0.5:
        return -1
    elif x < 0.5:
        return 0
    else:
        return 1
scaled_features_df['RESULT'] = scaled_features_df['RESULT'].map(remap_match_result)

scaled_features_df.head()


Out[27]:
country_id league_id season stage date home_team_api_id away_team_api_id home_player_X1 home_player_X2 home_player_X3 home_player_X4 home_player_X5 home_player_X6 home_player_X7 home_player_X8 home_player_X9 home_player_X10 home_player_X11 away_player_X1 away_player_X2 away_player_X3 away_player_X4 away_player_X5 away_player_X6 away_player_X7 away_player_X8 away_player_X9 away_player_X10 away_player_X11 home_player_Y1 home_player_Y2 home_player_Y3 home_player_Y4 home_player_Y5 home_player_Y6 home_player_Y7 home_player_Y8 home_player_Y9 home_player_Y10 home_player_Y11 away_player_Y1 away_player_Y2 away_player_Y3 away_player_Y4 away_player_Y5 away_player_Y6 away_player_Y7 away_player_Y8 away_player_Y9 away_player_Y10 away_player_Y11 home_player_1 home_player_2 home_player_3 home_player_4 home_player_5 home_player_6 home_player_7 home_player_8 home_player_9 home_player_10 home_player_11 away_player_1 away_player_2 away_player_3 away_player_4 away_player_5 away_player_6 away_player_7 away_player_8 away_player_9 away_player_10 away_player_11 B365H B365D B365A BWH BWD BWA IWH IWD IWA LBH LBD LBA PSH PSD PSA WHH WHD WHA SJH SJD SJA VCH VCD VCA GBH GBD GBA BSH BSD BSA RESULT
145 -1.440532 -1.440532 -1.677216 0.515898 -1.559993 -0.135024 -0.001675 0.0 -0.198326 -0.154246 -0.115885 0.28336 -1.00501 -0.741363 0.401581 1.127873 -0.949555 0.265198 0.0 -0.182678 -0.144603 -0.116614 0.288303 -0.97221 -0.691287 0.41015 1.143707 -0.97159 0.285025 0.0 0.0 -0.00715 -0.01599 -0.256033 0.721283 0.575381 -0.401031 -0.941504 0.722798 -0.886236 0.0 0.0 0.0 -0.00715 -0.256775 0.715858 0.560073 -0.414719 -0.936542 0.754508 -0.917742 -0.429893 -0.337037 -0.224224 -0.246484 -0.635638 -0.578564 0.497046 -0.334056 0.459801 -0.586835 -0.694314 -0.435937 -0.602667 0.580849 0.112343 0.346244 -0.575405 -0.495636 -0.848021 -0.589961 -0.650718 -0.588404 0.196088 -0.406710 -0.632078 0.226035 -0.494900 -0.665279 0.214631 -0.649964 -0.681214 0.117349 -0.475849 -0.643504 0.082688 -0.538962 -0.626417 0.011502 -0.498430 -0.607742 0.240605 -0.567410 -0.619835 -0.020894 -0.538450 -0.578251 0.177566 -0.495606 -0.628619 0.118295 -0.551762 -0.635042 1
153 -1.440532 -1.440532 -1.677216 0.611261 -1.543611 0.002016 -0.125486 0.0 -0.198326 -0.154246 -0.115885 0.28336 -1.00501 -0.741363 0.401581 1.127873 -0.949555 0.265198 0.0 -0.182678 -0.144603 -0.116614 0.288303 -0.97221 -0.691287 0.41015 1.143707 -0.97159 0.285025 0.0 0.0 -0.00715 -0.01599 -0.256033 0.721283 0.575381 -0.401031 -0.941504 0.722798 -0.886236 0.0 0.0 0.0 -0.00715 -0.256775 0.715858 0.560073 -0.414719 -0.936542 0.754508 -0.917742 -0.447044 -0.615512 -0.525151 -0.554597 -0.043088 -0.610629 -0.540628 -0.597685 -0.621749 -0.602553 -0.212502 -0.435331 -0.749476 -0.773347 -0.544503 -0.629337 -0.568294 -0.529328 -0.597484 -0.502916 -0.697882 -0.591531 0.085983 -0.584174 -0.613425 0.316373 -0.734249 -0.665279 0.214631 -0.649964 -0.681214 0.388182 -0.525052 -0.701889 0.129084 -0.586560 -0.636187 0.348779 -0.498430 -0.690366 0.240605 -0.521008 -0.633148 -0.020894 -0.578092 -0.578251 0.177566 -0.543462 -0.628619 0.013545 -0.551762 -0.598635 0
155 -1.440532 -1.440532 -1.677216 0.611261 -1.545951 -0.101783 -0.000697 0.0 -0.198326 -0.154246 -0.115885 0.28336 -1.00501 -0.741363 0.401581 1.127873 -0.949555 0.265198 0.0 -0.182678 -0.144603 -0.116614 0.288303 -0.97221 -0.691287 0.41015 1.143707 -0.97159 0.285025 0.0 0.0 -0.00715 -0.01599 -0.256033 0.721283 0.575381 -0.401031 -0.941504 0.722798 -0.886236 0.0 0.0 0.0 -0.00715 -0.256775 0.715858 0.560073 -0.414719 -0.936542 0.754508 -0.917742 -0.474114 -0.603428 -0.627898 -0.790138 -0.624390 -0.628015 -0.536527 -0.594661 -0.621526 -0.587043 -0.596329 -0.436360 -0.606293 -0.513894 -0.544197 -0.135424 -0.569114 -0.544358 0.206842 0.165346 -0.601101 0.259737 -0.684755 0.569343 0.886835 -0.737574 0.223145 1.386640 -0.771947 0.462000 0.987423 -0.719225 0.262203 0.757756 -0.690371 0.220883 0.758224 -0.721204 0.331609 0.686701 -0.709306 0.639040 1.030942 -0.710796 0.333665 0.713752 -0.729034 0.538083 1.156349 -0.719706 0.405835 0.695220 1
162 -1.440532 -1.440532 -1.677216 0.706624 -1.538930 -0.135024 -0.103433 0.0 -0.198326 -0.154246 -0.115885 0.28336 -1.00501 -0.741363 0.401581 1.127873 -0.949555 0.265198 0.0 -0.182678 -0.144603 -0.116614 0.288303 -0.97221 -0.691287 0.41015 1.143707 -0.97159 0.285025 0.0 0.0 -0.00715 -0.01599 -0.256033 0.721283 0.575381 -0.401031 -0.941504 0.722798 -0.886236 0.0 0.0 0.0 -0.00715 -0.256775 0.715858 0.560073 -0.414719 -0.936542 0.754508 -0.917742 -0.429893 -0.337037 -0.224224 -0.537152 -0.364475 -0.572033 -0.258451 0.372623 0.459801 -0.633408 -0.694314 -0.475463 -0.601821 -0.509075 -0.609070 0.491585 -0.632340 -0.540622 -0.597078 -0.622279 -0.600904 -0.604069 1.021879 -0.229246 -0.778640 1.279982 -0.447031 -0.816156 1.167190 -0.526413 -0.855030 0.869664 -0.328238 -0.783630 0.895184 -0.435111 -0.761810 1.005888 -0.394675 -0.772990 1.196138 -0.288998 -0.771600 0.847871 -0.498808 -0.715455 1.113411 -0.352039 -0.786963 0.933018 -0.456002 -0.775070 1
168 -1.440532 -1.440532 -1.677216 0.706624 -1.536590 0.003247 -0.000772 0.0 -0.198326 -0.154246 -0.115885 0.28336 -1.00501 -0.741363 0.401581 1.127873 -0.949555 0.265198 0.0 -0.182678 -0.144603 -0.116614 0.288303 -0.97221 -0.691287 0.41015 1.143707 -0.97159 0.285025 0.0 0.0 -0.00715 -0.01599 -0.256033 0.721283 0.575381 -0.401031 -0.941504 0.722798 -0.886236 0.0 0.0 0.0 -0.00715 -0.256775 0.715858 0.560073 -0.414719 -0.936542 0.754508 -0.917742 -0.434801 -0.607952 -0.524383 -0.546104 -0.628794 -0.564939 -0.540402 0.225332 0.178055 -0.591996 0.265646 -0.431580 -0.603077 -0.727231 0.012434 -0.133170 -0.496828 -0.650817 -0.637746 0.454519 -0.603662 -0.635590 -0.492071 -0.317978 0.020788 -0.587010 -0.207682 0.194716 -0.567828 -0.279310 0.014051 -0.568762 -0.475849 0.173898 -0.530308 -0.374531 -0.018961 -0.540935 -0.290920 -0.048653 -0.535062 -0.288998 0.099052 -0.531932 -0.340242 -0.086604 -0.553563 -0.256327 0.148706 -0.521845 -0.360243 -0.052527 -1

MODEL LEARNING

IMPORTS AND FUNCTIONS


In [22]:
# machine learning models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [23]:
#Split data for train and test
def split_data(data, targ):
    #set target for training
    target = data[targ]

    # Import the train_test_split method
    from sklearn.model_selection import train_test_split
    # Split data into train (3/4th of data) and test (1/4th of data)
    return train_test_split(data, target, train_size = 0.75, random_state=0);

In [24]:
def regr_equation(logreg, train, target):
    if type(model) is LogisticRegression:
        coef = logreg.coef_[0]
        intercept = "{:.2f}".format(logreg.intercept_[0])
    else:
        coef = logreg.coef_
        intercept = "{:.2f}".format(logreg.intercept_)
        
    output = target.name + ' = ' + str(intercept) + ' + '
    coeff_df = pd.DataFrame(train.columns.delete(0))
    coeff_df.columns = ['Feature']
    coeff_df["Correlation"] = pd.Series(logreg.coef_[0])
    features = coeff_df['Feature'].tolist()
    coefficients = coeff_df['Correlation'].tolist()
    
    for coeff, feature in zip(coefficients, features):
        coeff_str = "{:.2f}".format(coeff)
        output += coeff_str + "*" + str(feature) + " + "
    return output[:-3]

In [25]:
def confusion_matrix(model, X, y):
    from sklearn.metrics import confusion_matrix
    return confusion_matrix(y, model.predict(X))

In [26]:
def test_model (model, data, target, submission_name = None, test=None):
   
    if test is None:
        from sklearn.metrics import accuracy_score, confusion_matrix, r2_score
        train, test, target_train, target_test = split_data(data, target)
        train = train.drop('RESULT', 1)
        test = test.drop('RESULT', 1)
        model.fit(train, target_train)
        #Calc parameters
        if type(model) is LogisticRegression:
            function_str = regr_equation(model, train, target_train)
        elif type(model) is LinearRegression:
            function_str = regr_equation(model, train, target_train)
        else :
            function_str = "NA"    

        if type(model) is not LinearRegression:
            trainset_acc = round(accuracy_score(target_train, model.predict(train)) * 100, 2)
            testset_acc = round(accuracy_score(target_test, model.predict(test)) * 100, 2)
            conf_matrix = confusion_matrix(target_train, model.predict(train))
            conf_matrix = 'TN: '+str(conf_matrix[0][0])+', FP: '+str(conf_matrix[0][1])+ \
                              ', FN: '+str(conf_matrix[1][0])+', TP: '+str(conf_matrix[1][1])
        else:
            trainset_acc = 'NA'
            testset_acc = 'NA'
            conf_matrix = "NA"

        r2_score = r2_score(target_train, model.predict(train))
        kaggle = "not_tested"
        
          #Perform k-fold cross-validation with 5 folds
        from sklearn.cross_validation import KFold   #For K-fold cross validation
        kf = KFold(data.shape[0], n_folds=5)
        error = []
        predictors = list(data.columns.values)
        del predictors[-1]
        for train, test in kf:
            # Filter training data
            train_predictors = (data[predictors].iloc[train,:])

            # The target we're using to train the algorithm.
            train_target = data['RESULT'].iloc[train]

            # Training the algorithm using the predictors and target.
            model.fit(train_predictors, train_target)

            #Record error from each cross-validation run
            error.append(model.score(data[predictors].iloc[test,:], data['RESULT'].iloc[test]))

        #Fit the model again so that it can be refered
        #prints
        print("-"*40)
        print('Submission name:', submission_name )
        print('Regression function:\n', function_str)
        print('Accuracy on train set:', trainset_acc,"%")
        print('Accuracy on test set:', testset_acc,"%")
        print ("Cross-Validation Score : %s" % "{0:.3%}".format(np.mean(error)))
        print("R2 score:", r2_score)
        print("Confusion matrix:\n", conf_matrix)
        return [submission_name, function_str, str(trainset_acc), str(testset_acc), r2_score, conf_matrix, np.mean(error)]

SETUP


In [28]:
#Setup
suffix = '_droppedMeaningless_noScaler_addedParams_playerOverallRating'
modelMLP = MLPClassifier(hidden_layer_sizes=(104, 104, 104), activation='logistic', solver='sgd', alpha=0.0001, 
                         batch_size='auto', learning_rate='adaptive', learning_rate_init=1, power_t=0.5, 
                         max_iter=2000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, 
                         momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, 
                         beta_1=0.9, beta_2=0.999, epsilon=1e-08)
modelLogReg = LogisticRegression(n_jobs = -1)
modelLinReg = LinearRegression()
modelSVC = SVC()
modellinSVC = LinearSVC()
modelKN = KNeighborsClassifier(n_neighbors = 3)
modelGNB = GaussianNB()
modelPercp = Perceptron()
modelSGD = SGDClassifier()
modelTree = DecisionTreeClassifier(criterion='gini', splitter='random', max_depth=None, 
                                   min_samples_split=16, min_samples_leaf=8, min_weight_fraction_leaf=0.0, 
                                   max_features='sqrt', random_state=None, max_leaf_nodes=None, 
                                   min_impurity_split=1e-07, class_weight=None, presort=False)
modelRndForest = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=None, min_samples_split=2, 
                                        min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                        max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=True, oob_score=False,
                                        n_jobs=-1, random_state=None, verbose=0, warm_start=False, class_weight=None)

modelXTree = ExtraTreesClassifier(n_estimators=500, criterion='gini', max_depth=None, min_samples_split=16, 
                                  min_samples_leaf=5, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                  max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=False, oob_score=False, 
                                  n_jobs=-1, random_state=None, verbose=0, warm_start=False, class_weight=None)

models = {
          modelRndForest: "RandomForest",
          modelLogReg : "logreg",
          modelXTree : 'xTrees'
         }

In [30]:
outputs = []
for model, prefix in models.items():
    name=prefix+suffix
    outputs.append(test_model(model, ml_matches_df, target='RESULT', submission_name=name))
outputs.sort(key=lambda x: x[0])


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
----------------------------------------
Submission name: RandomForest_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 NA
Accuracy on train set: 100.0 %
Accuracy on test set: 53.19 %
Cross-Validation Score : 52.587%
R2 score: 1.0
Confusion matrix:
 TN: 3533, FP: 0, FN: 0, TP: 3997
----------------------------------------
Submission name: logreg_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 RESULT = -0.00 + -0.00*league_id + -0.00*season + 0.00*stage + -0.00*date + -0.00*home_team_api_id + -0.00*away_team_api_id + 0.00*home_player_X1 + -0.00*home_player_X2 + -0.00*home_player_X3 + -0.00*home_player_X4 + -0.00*home_player_X5 + -0.00*home_player_X6 + -0.00*home_player_X7 + -0.00*home_player_X8 + -0.00*home_player_X9 + -0.00*home_player_X10 + -0.00*home_player_X11 + -0.00*away_player_X1 + -0.00*away_player_X2 + -0.00*away_player_X3 + -0.00*away_player_X4 + -0.00*away_player_X5 + -0.00*away_player_X6 + -0.00*away_player_X7 + -0.00*away_player_X8 + -0.00*away_player_X9 + -0.00*away_player_X10 + -0.00*away_player_X11 + -0.00*home_player_Y1 + -0.00*home_player_Y2 + -0.00*home_player_Y3 + -0.00*home_player_Y4 + -0.00*home_player_Y5 + -0.00*home_player_Y6 + -0.00*home_player_Y7 + -0.00*home_player_Y8 + -0.00*home_player_Y9 + -0.00*home_player_Y10 + -0.00*home_player_Y11 + -0.00*away_player_Y1 + -0.00*away_player_Y2 + -0.00*away_player_Y3 + -0.00*away_player_Y4 + -0.00*away_player_Y5 + -0.00*away_player_Y6 + -0.00*away_player_Y7 + -0.00*away_player_Y8 + -0.00*away_player_Y9 + -0.00*away_player_Y10 + -0.00*away_player_Y11 + -0.00*home_player_1 + 0.00*home_player_2 + 0.00*home_player_3 + -0.00*home_player_4 + -0.00*home_player_5 + 0.00*home_player_6 + 0.00*home_player_7 + -0.00*home_player_8 + 0.00*home_player_9 + 0.00*home_player_10 + 0.00*home_player_11 + 0.00*away_player_1 + -0.00*away_player_2 + 0.00*away_player_3 + -0.00*away_player_4 + 0.00*away_player_5 + -0.00*away_player_6 + -0.00*away_player_7 + -0.00*away_player_8 + -0.00*away_player_9 + -0.00*away_player_10 + 0.00*away_player_11 + -0.00*B365H + -0.00*B365D + -0.00*B365A + -0.00*BWH + -0.00*BWD + -0.00*BWA + -0.00*IWH + -0.00*IWD + -0.00*IWA + -0.00*LBH + -0.00*LBD + -0.00*LBA + -0.00*PSH + -0.00*PSD + -0.00*PSA + -0.00*WHH + -0.00*WHD + -0.00*WHA + -0.00*SJH + -0.00*SJD + -0.00*SJA + -0.00*VCH + -0.00*VCD + -0.00*VCA + -0.00*GBH + -0.00*GBD + -0.00*GBA + -0.00*BSH + -0.00*BSD + -0.00*BSA + -0.00*home_player_1_overall_rating + -0.00*home_player_2_overall_rating + -0.00*home_player_3_overall_rating + -0.00*home_player_4_overall_rating + -0.00*home_player_5_overall_rating + -0.00*home_player_6_overall_rating + -0.00*home_player_7_overall_rating + -0.00*home_player_8_overall_rating + -0.00*home_player_9_overall_rating + -0.00*home_player_10_overall_rating + -0.00*home_player_11_overall_rating + -0.00*away_player_1_overall_rating + -0.00*away_player_2_overall_rating + -0.00*away_player_3_overall_rating + -0.00*away_player_4_overall_rating + -0.00*away_player_5_overall_rating + -0.00*away_player_6_overall_rating + -0.00*away_player_7_overall_rating + -0.00*away_player_8_overall_rating + -0.00*away_player_9_overall_rating + -0.00*away_player_10_overall_rating + -0.00*away_player_11_overall_rating
Accuracy on train set: 52.83 %
Accuracy on test set: 54.4 %
Cross-Validation Score : 51.853%
R2 score: -0.555559624099
Confusion matrix:
 TN: 6, FP: 881, FN: 4, TP: 1789
----------------------------------------
Submission name: xTrees_droppedMeaningless_noScaler_addedParams_playerOverallRating
Regression function:
 NA
Accuracy on train set: 84.25 %
Accuracy on test set: 53.17 %
Cross-Validation Score : 53.051%
R2 score: 0.500393247669
Confusion matrix:
 TN: 2412, FP: 298, FN: 2, TP: 3201

In [31]:
for out in outputs:
    super_table.append(out)

In [30]:
make_table(super_table)
apply_theme('basic')


Out[30]:
NameRegression&nbspfunctionTrain&nbspAccValidation&nbspAccr2_scoreconf_matrixCross-Valid.
DTree_firstTry_justdroppedAllTherenan100.0100.01.0TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp3800.9971639196497554
LogReg_firstTry_justdroppedAllThereRESULT =&nbsp0.00 +&nbsp0.00*country_id +&nbsp0.00*league_id +&nbsp0.00*season +&nbsp0.00*stage +&nbsp-0.05*date +&nbsp0.01*match_api_id +&nbsp-0.00*home_team_api_id +&nbsp0.00*away_team_api_id +&nbsp-0.00*home_team_goal +&nbsp0.42*away_team_goal +&nbsp-0.87*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp0.01*home_player_X3 +&nbsp-0.02*home_player_X4 +&nbsp-0.01*home_player_X5 +&nbsp-0.06*home_player_X6 +&nbsp-0.02*home_player_X7 +&nbsp0.05*home_player_X8 +&nbsp0.03*home_player_X9 +&nbsp0.01*home_player_X10 +&nbsp0.02*home_player_X11 +&nbsp0.07*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp0.02*away_player_X4 +&nbsp-0.03*away_player_X5 +&nbsp0.06*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp0.05*away_player_X8 +&nbsp0.11*away_player_X9 +&nbsp0.07*away_player_X10 +&nbsp-0.01*away_player_X11 +&nbsp-0.05*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp0.00*home_player_Y4 +&nbsp0.00*home_player_Y5 +&nbsp0.02*home_player_Y6 +&nbsp0.00*home_player_Y7 +&nbsp-0.08*home_player_Y8 +&nbsp-0.04*home_player_Y9 +&nbsp-0.01*home_player_Y10 +&nbsp-0.04*home_player_Y11 +&nbsp-0.03*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.00*away_player_Y5 +&nbsp-0.01*away_player_Y6 +&nbsp0.04*away_player_Y7 +&nbsp-0.05*away_player_Y8 +&nbsp0.00*away_player_Y9 +&nbsp0.02*away_player_Y10 +&nbsp0.02*away_player_Y11 +&nbsp0.07*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp0.00*home_player_5 +&nbsp-0.00*home_player_6 +&nbsp-0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp-0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp-0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp0.00*away_player_8 +&nbsp-0.00*away_player_9 +&nbsp0.00*away_player_10 +&nbsp0.00*away_player_11 +&nbsp-0.00*goal +&nbsp-0.00*shoton +&nbsp-0.00*shotoff +&nbsp-0.00*foulcommit +&nbsp0.00*card +&nbsp-0.00*cross +&nbsp-0.00*corner +&nbsp-0.00*possession +&nbsp-0.00*B365H +&nbsp0.11*B365D +&nbsp0.05*B365A +&nbsp0.09*BWH +&nbsp0.03*BWD +&nbsp-0.01*BWA +&nbsp-0.01*IWH +&nbsp-0.05*IWD +&nbsp-0.01*IWA +&nbsp-0.03*LBH +&nbsp0.01*LBD +&nbsp-0.02*LBA +&nbsp0.01*PSH +&nbsp0.00*PSD +&nbsp-0.00*PSA +&nbsp0.00*WHH +&nbsp0.01*WHD +&nbsp0.00*WHA +&nbsp0.03*SJH +&nbsp-0.02*SJD +&nbsp0.02*SJA +&nbsp0.01*VCH +&nbsp-0.05*VCD +&nbsp0.00*VCA +&nbsp-0.05*GBH +&nbsp0.03*GBD +&nbsp-0.01*GBA +&nbsp-0.01*BSH +&nbsp-0.05*BSD +&nbsp0.07*BSA +&nbsp0.00*RESULT100.0100.01.0TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp3800.8656563868143188
MLPerc_firstTry_justdroppedAllTherenan69.3448.530.06720900266300145TN:&nbsp212,&nbspFP:&nbsp71,&nbspFN:&nbsp48,&nbspTP:&nbsp2900.4472685423641514
RandomForest_firstTry_justdroppedAllTherenan99.9296.60.9988652177647968TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp3790.7560294875096576
xTrees_firstTry_justdroppedAllTherenan100.097.281.0TN:&nbsp330,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp3800.6283109065155807
DTree_firstTry_droppedMeaninglessnan100.0100.01.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4133221310957079
LogReg_firstTry_droppedMeaninglessRESULT =&nbsp0.00 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp0.00*stage +&nbsp-0.00*date +&nbsp0.00*home_team_api_id +&nbsp0.00*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp-0.01*home_player_X4 +&nbsp0.00*home_player_X5 +&nbsp-0.01*home_player_X6 +&nbsp0.01*home_player_X7 +&nbsp-0.00*home_player_X8 +&nbsp0.01*home_player_X9 +&nbsp-0.01*home_player_X10 +&nbsp0.01*home_player_X11 +&nbsp0.02*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp0.01*away_player_X5 +&nbsp0.00*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp-0.00*away_player_X8 +&nbsp0.00*away_player_X9 +&nbsp0.01*away_player_X10 +&nbsp0.00*away_player_X11 +&nbsp-0.01*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.00*home_player_Y4 +&nbsp-0.00*home_player_Y5 +&nbsp0.00*home_player_Y6 +&nbsp0.00*home_player_Y7 +&nbsp0.01*home_player_Y8 +&nbsp-0.00*home_player_Y9 +&nbsp0.01*home_player_Y10 +&nbsp0.01*home_player_Y11 +&nbsp0.00*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.00*away_player_Y5 +&nbsp0.01*away_player_Y6 +&nbsp0.01*away_player_Y7 +&nbsp0.01*away_player_Y8 +&nbsp0.01*away_player_Y9 +&nbsp-0.00*away_player_Y10 +&nbsp0.01*away_player_Y11 +&nbsp0.02*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp0.00*home_player_5 +&nbsp0.00*home_player_6 +&nbsp-0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp-0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp-0.00*away_player_8 +&nbsp0.00*away_player_9 +&nbsp0.00*away_player_10 +&nbsp-0.00*away_player_11 +&nbsp-0.00*B365H +&nbsp0.01*B365D +&nbsp-0.01*B365A +&nbsp0.00*BWH +&nbsp-0.02*BWD +&nbsp-0.00*BWA +&nbsp0.03*IWH +&nbsp-0.01*IWD +&nbsp-0.00*IWA +&nbsp0.01*LBH +&nbsp0.01*LBD +&nbsp-0.00*LBA +&nbsp0.01*PSH +&nbsp0.00*PSD +&nbsp-0.00*PSA +&nbsp-0.00*WHH +&nbsp-0.01*WHD +&nbsp-0.01*WHA +&nbsp0.01*SJH +&nbsp-0.00*SJD +&nbsp0.00*SJA +&nbsp0.00*VCH +&nbsp0.00*VCD +&nbsp0.00*VCA +&nbsp0.01*GBH +&nbsp-0.00*GBD +&nbsp0.00*GBA +&nbsp-0.00*BSH +&nbsp-0.00*BSD +&nbsp0.00*BSA +&nbsp-0.00*RESULT82.481.560.7368259494499363TN:&nbsp3387,&nbspFP:&nbsp317,&nbspFN:&nbsp366,&nbspTP:&nbsp22310.5285518044042797
MLPerc_firstTry_droppedMeaninglessnan28.9828.34-2.028336257123525TN:&nbsp3498,&nbspFP:&nbsp203,&nbspFN:&nbsp3483,&nbspTP:&nbsp7250.4508442703441989
RandomForest_firstTry_droppedMeaninglessnan99.9798.220.9995922942671572TN:&nbsp3703,&nbspFP:&nbsp1,&nbspFN:&nbsp3,&nbspTP:&nbsp42310.4630133052375883
xTrees_firstTry_droppedMeaninglessnan100.099.751.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4639329496299106
DTree_firstTry_droppedMeaningless_StandardScalernan100.0100.01.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4123000804303415
LogReg_firstTry_droppedMeaningless_StandardScalerRESULT =&nbsp-5.38 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp-0.01*stage +&nbsp-0.02*date +&nbsp-0.02*home_team_api_id +&nbsp0.01*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.00*home_player_X3 +&nbsp-0.01*home_player_X4 +&nbsp-0.00*home_player_X5 +&nbsp-0.03*home_player_X6 +&nbsp0.02*home_player_X7 +&nbsp0.01*home_player_X8 +&nbsp-0.02*home_player_X9 +&nbsp0.03*home_player_X10 +&nbsp0.02*home_player_X11 +&nbsp0.03*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp-0.01*away_player_X3 +&nbsp0.02*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.02*away_player_X6 +&nbsp0.01*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.00*away_player_X9 +&nbsp-0.01*away_player_X10 +&nbsp-0.02*away_player_X11 +&nbsp0.02*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp0.02*home_player_Y4 +&nbsp0.07*home_player_Y5 +&nbsp-0.01*home_player_Y6 +&nbsp0.02*home_player_Y7 +&nbsp-0.00*home_player_Y8 +&nbsp-0.05*home_player_Y9 +&nbsp0.04*home_player_Y10 +&nbsp0.02*home_player_Y11 +&nbsp0.02*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.04*away_player_Y5 +&nbsp0.00*away_player_Y6 +&nbsp0.01*away_player_Y7 +&nbsp0.01*away_player_Y8 +&nbsp0.00*away_player_Y9 +&nbsp-0.03*away_player_Y10 +&nbsp0.02*away_player_Y11 +&nbsp0.06*home_player_1 +&nbsp0.01*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.02*home_player_4 +&nbsp0.03*home_player_5 +&nbsp0.01*home_player_6 +&nbsp-0.01*home_player_7 +&nbsp-0.01*home_player_8 +&nbsp-0.00*home_player_9 +&nbsp0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp-0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.01*away_player_3 +&nbsp-0.01*away_player_4 +&nbsp0.01*away_player_5 +&nbsp-0.01*away_player_6 +&nbsp0.02*away_player_7 +&nbsp0.01*away_player_8 +&nbsp-0.02*away_player_9 +&nbsp-0.00*away_player_10 +&nbsp-0.00*away_player_11 +&nbsp0.00*B365H +&nbsp-0.02*B365D +&nbsp0.04*B365A +&nbsp0.03*BWH +&nbsp-0.02*BWD +&nbsp0.08*BWA +&nbsp0.12*IWH +&nbsp-0.01*IWD +&nbsp0.00*IWA +&nbsp-0.01*LBH +&nbsp0.01*LBD +&nbsp-0.01*LBA +&nbsp-0.01*PSH +&nbsp-0.03*PSD +&nbsp-0.01*PSA +&nbsp0.04*WHH +&nbsp-0.02*WHD +&nbsp-0.01*WHA +&nbsp0.03*SJH +&nbsp-0.01*SJD +&nbsp-0.04*SJA +&nbsp0.01*VCH +&nbsp-0.04*VCD +&nbsp0.06*VCA +&nbsp0.03*GBH +&nbsp0.01*GBD +&nbsp-0.07*GBA +&nbsp-0.00*BSH +&nbsp0.01*BSD +&nbsp-0.03*BSA +&nbsp0.02*RESULT100.099.981.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.5259445469639505
MLPerc_firstTry_droppedMeaningless_StandardScalernan100.099.961.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4587692589904033
RandomForest_firstTry_droppedMeaningless_StandardScalernan99.9798.00.9994903678339464TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp4,&nbspTP:&nbsp42300.4734931592400561
xTrees_firstTry_droppedMeaningless_StandardScalernan100.098.981.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4553448810190143
logreg_firstTry_droppedMeaningless_StandardScaler_corrRESULT =&nbsp-1.12 +&nbsp-0.01*league_id +&nbsp-0.01*season +&nbsp0.49*stage +&nbsp0.01*date +&nbsp-0.47*home_team_api_id +&nbsp-0.00*away_team_api_id +&nbsp-0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp0.00*home_player_X4 +&nbsp0.01*home_player_X5 +&nbsp0.03*home_player_X6 +&nbsp0.14*home_player_X7 +&nbsp0.12*home_player_X8 +&nbsp0.10*home_player_X9 +&nbsp0.17*home_player_X10 +&nbsp0.19*home_player_X11 +&nbsp0.21*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp0.01*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.09*away_player_X6 +&nbsp0.05*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.02*away_player_X9 +&nbsp-0.05*away_player_X10 +&nbsp0.01*away_player_X11 +&nbsp0.00*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.01*home_player_Y4 +&nbsp-0.10*home_player_Y5 +&nbsp-0.03*home_player_Y6 +&nbsp0.04*home_player_Y7 +&nbsp0.03*home_player_Y8 +&nbsp-0.06*home_player_Y9 +&nbsp-0.03*home_player_Y10 +&nbsp0.04*home_player_Y11 +&nbsp0.09*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.01*away_player_Y5 +&nbsp0.06*away_player_Y6 +&nbsp0.07*away_player_Y7 +&nbsp0.00*away_player_Y8 +&nbsp-0.01*away_player_Y9 +&nbsp-0.09*away_player_Y10 +&nbsp0.10*away_player_Y11 +&nbsp0.10*home_player_1 +&nbsp-0.01*home_player_2 +&nbsp0.03*home_player_3 +&nbsp-0.03*home_player_4 +&nbsp0.02*home_player_5 +&nbsp0.03*home_player_6 +&nbsp-0.02*home_player_7 +&nbsp-0.04*home_player_8 +&nbsp0.04*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.05*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.04*away_player_6 +&nbsp0.00*away_player_7 +&nbsp-0.01*away_player_8 +&nbsp-0.04*away_player_9 +&nbsp0.02*away_player_10 +&nbsp-0.01*away_player_11 +&nbsp0.02*B365H +&nbsp0.36*B365D +&nbsp-0.19*B365A +&nbsp-0.31*BWH +&nbsp0.03*BWD +&nbsp0.01*BWA +&nbsp0.17*IWH +&nbsp0.10*IWD +&nbsp-0.04*IWA +&nbsp-0.01*LBH +&nbsp0.27*LBD +&nbsp0.02*LBA +&nbsp0.03*PSH +&nbsp-0.53*PSD +&nbsp0.06*PSA +&nbsp0.09*WHH +&nbsp-0.32*WHD +&nbsp-0.10*WHA +&nbsp0.02*SJH +&nbsp-0.19*SJD +&nbsp0.02*SJA +&nbsp-0.16*VCH +&nbsp0.32*VCD +&nbsp-0.04*VCA +&nbsp0.01*GBH +&nbsp-0.10*GBD +&nbsp-0.07*GBA +&nbsp-0.27*BSH +&nbsp-0.12*BSD +&nbsp-0.01*BSA53.5751.89-0.5115690045148893TN:&nbsp109,&nbspFP:&nbsp1008,&nbspFN:&nbsp72,&nbspTP:&nbsp20380.5259445469639505
DTree_firstTry_droppedMeaningless_StandardScaler_addedParamsnan61.3845.68-0.2456429402681364TN:&nbsp1683,&nbspFP:&nbsp768,&nbspFN:&nbsp758,&nbspTP:&nbsp24040.4560088886240977
MLPerc_firstTry_droppedMeaningless_StandardScaler_addedParamsnan99.9941.570.9998980735667892TN:&nbsp3703,&nbspFP:&nbsp1,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.4634243283230301
RandomForest_firstTry_droppedMeaningless_StandardScaler_addedParamsnan100.051.811.0TN:&nbsp3704,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp42340.5266602541891456
xTrees_firstTry_droppedMeaningless_StandardScaler_addedParamsnan82.8352.340.454285876589837TN:&nbsp2409,&nbspFP:&nbsp350,&nbspFN:&nbsp3,&nbspTP:&nbsp33310.5280405830912323
logreg_balanced_firstTry_droppedMeaningless_StandardScaler_addedParamsRESULT =&nbsp-0.85 +&nbsp-0.01*league_id +&nbsp-0.01*season +&nbsp0.54*stage +&nbsp0.02*date +&nbsp-0.52*home_team_api_id +&nbsp-0.01*away_team_api_id +&nbsp-0.00*home_player_X1 +&nbsp0.00*home_player_X2 +&nbsp-0.01*home_player_X3 +&nbsp0.00*home_player_X4 +&nbsp0.02*home_player_X5 +&nbsp0.03*home_player_X6 +&nbsp0.14*home_player_X7 +&nbsp0.12*home_player_X8 +&nbsp0.10*home_player_X9 +&nbsp0.17*home_player_X10 +&nbsp0.19*home_player_X11 +&nbsp0.21*away_player_X1 +&nbsp0.00*away_player_X2 +&nbsp0.01*away_player_X3 +&nbsp-0.01*away_player_X4 +&nbsp0.02*away_player_X5 +&nbsp0.08*away_player_X6 +&nbsp0.05*away_player_X7 +&nbsp-0.01*away_player_X8 +&nbsp-0.03*away_player_X9 +&nbsp-0.05*away_player_X10 +&nbsp0.00*away_player_X11 +&nbsp0.00*home_player_Y1 +&nbsp0.00*home_player_Y2 +&nbsp0.00*home_player_Y3 +&nbsp-0.01*home_player_Y4 +&nbsp-0.11*home_player_Y5 +&nbsp-0.02*home_player_Y6 +&nbsp0.04*home_player_Y7 +&nbsp0.03*home_player_Y8 +&nbsp-0.06*home_player_Y9 +&nbsp-0.03*home_player_Y10 +&nbsp0.04*home_player_Y11 +&nbsp0.09*away_player_Y1 +&nbsp0.00*away_player_Y2 +&nbsp0.00*away_player_Y3 +&nbsp0.00*away_player_Y4 +&nbsp0.01*away_player_Y5 +&nbsp0.06*away_player_Y6 +&nbsp0.07*away_player_Y7 +&nbsp0.00*away_player_Y8 +&nbsp-0.01*away_player_Y9 +&nbsp-0.09*away_player_Y10 +&nbsp0.10*away_player_Y11 +&nbsp0.11*home_player_1 +&nbsp-0.01*home_player_2 +&nbsp0.03*home_player_3 +&nbsp-0.03*home_player_4 +&nbsp0.02*home_player_5 +&nbsp0.03*home_player_6 +&nbsp-0.01*home_player_7 +&nbsp-0.04*home_player_8 +&nbsp0.04*home_player_9 +&nbsp-0.00*home_player_10 +&nbsp0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.00*away_player_3 +&nbsp-0.05*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.04*away_player_6 +&nbsp0.00*away_player_7 +&nbsp-0.01*away_player_8 +&nbsp-0.04*away_player_9 +&nbsp0.02*away_player_10 +&nbsp-0.01*away_player_11 +&nbsp0.02*B365H +&nbsp0.37*B365D +&nbsp-0.19*B365A +&nbsp-0.32*BWH +&nbsp0.03*BWD +&nbsp0.01*BWA +&nbsp0.17*IWH +&nbsp0.10*IWD +&nbsp-0.04*IWA +&nbsp-0.01*LBH +&nbsp0.28*LBD +&nbsp0.02*LBA +&nbsp0.02*PSH +&nbsp-0.54*PSD +&nbsp0.06*PSA +&nbsp0.09*WHH +&nbsp-0.33*WHD +&nbsp-0.10*WHA +&nbsp0.02*SJH +&nbsp-0.19*SJD +&nbsp0.02*SJA +&nbsp-0.16*VCH +&nbsp0.32*VCD +&nbsp-0.04*VCA +&nbsp0.01*GBH +&nbsp-0.10*GBD +&nbsp-0.07*GBA +&nbsp-0.27*BSH +&nbsp-0.13*BSD +&nbsp-0.01*BSA51.6850.13-0.5593725016907143TN:&nbsp799,&nbspFP:&nbsp1161,&nbspFN:&nbsp677,&nbspTP:&nbsp22440.5049333353192677
RandomForest_droppedMeaningless_noScaler_addedParams_playerOverallRatingnan100.053.191.0TN:&nbsp3533,&nbspFP:&nbsp0,&nbspFN:&nbsp0,&nbspTP:&nbsp39970.5258748380967383
logreg_droppedMeaningless_noScaler_addedParams_playerOverallRatingRESULT =&nbsp-0.00 +&nbsp-0.00*league_id +&nbsp-0.00*season +&nbsp0.00*stage +&nbsp-0.00*date +&nbsp-0.00*home_team_api_id +&nbsp-0.00*away_team_api_id +&nbsp0.00*home_player_X1 +&nbsp-0.00*home_player_X2 +&nbsp-0.00*home_player_X3 +&nbsp-0.00*home_player_X4 +&nbsp-0.00*home_player_X5 +&nbsp-0.00*home_player_X6 +&nbsp-0.00*home_player_X7 +&nbsp-0.00*home_player_X8 +&nbsp-0.00*home_player_X9 +&nbsp-0.00*home_player_X10 +&nbsp-0.00*home_player_X11 +&nbsp-0.00*away_player_X1 +&nbsp-0.00*away_player_X2 +&nbsp-0.00*away_player_X3 +&nbsp-0.00*away_player_X4 +&nbsp-0.00*away_player_X5 +&nbsp-0.00*away_player_X6 +&nbsp-0.00*away_player_X7 +&nbsp-0.00*away_player_X8 +&nbsp-0.00*away_player_X9 +&nbsp-0.00*away_player_X10 +&nbsp-0.00*away_player_X11 +&nbsp-0.00*home_player_Y1 +&nbsp-0.00*home_player_Y2 +&nbsp-0.00*home_player_Y3 +&nbsp-0.00*home_player_Y4 +&nbsp-0.00*home_player_Y5 +&nbsp-0.00*home_player_Y6 +&nbsp-0.00*home_player_Y7 +&nbsp-0.00*home_player_Y8 +&nbsp-0.00*home_player_Y9 +&nbsp-0.00*home_player_Y10 +&nbsp-0.00*home_player_Y11 +&nbsp-0.00*away_player_Y1 +&nbsp-0.00*away_player_Y2 +&nbsp-0.00*away_player_Y3 +&nbsp-0.00*away_player_Y4 +&nbsp-0.00*away_player_Y5 +&nbsp-0.00*away_player_Y6 +&nbsp-0.00*away_player_Y7 +&nbsp-0.00*away_player_Y8 +&nbsp-0.00*away_player_Y9 +&nbsp-0.00*away_player_Y10 +&nbsp-0.00*away_player_Y11 +&nbsp-0.00*home_player_1 +&nbsp0.00*home_player_2 +&nbsp0.00*home_player_3 +&nbsp-0.00*home_player_4 +&nbsp-0.00*home_player_5 +&nbsp0.00*home_player_6 +&nbsp0.00*home_player_7 +&nbsp-0.00*home_player_8 +&nbsp0.00*home_player_9 +&nbsp0.00*home_player_10 +&nbsp0.00*home_player_11 +&nbsp0.00*away_player_1 +&nbsp-0.00*away_player_2 +&nbsp0.00*away_player_3 +&nbsp-0.00*away_player_4 +&nbsp0.00*away_player_5 +&nbsp-0.00*away_player_6 +&nbsp-0.00*away_player_7 +&nbsp-0.00*away_player_8 +&nbsp-0.00*away_player_9 +&nbsp-0.00*away_player_10 +&nbsp0.00*away_player_11 +&nbsp-0.00*B365H +&nbsp-0.00*B365D +&nbsp-0.00*B365A +&nbsp-0.00*BWH +&nbsp-0.00*BWD +&nbsp-0.00*BWA +&nbsp-0.00*IWH +&nbsp-0.00*IWD +&nbsp-0.00*IWA +&nbsp-0.00*LBH +&nbsp-0.00*LBD +&nbsp-0.00*LBA +&nbsp-0.00*PSH +&nbsp-0.00*PSD +&nbsp-0.00*PSA +&nbsp-0.00*WHH +&nbsp-0.00*WHD +&nbsp-0.00*WHA +&nbsp-0.00*SJH +&nbsp-0.00*SJD +&nbsp-0.00*SJA +&nbsp-0.00*VCH +&nbsp-0.00*VCD +&nbsp-0.00*VCA +&nbsp-0.00*GBH +&nbsp-0.00*GBD +&nbsp-0.00*GBA +&nbsp-0.00*BSH +&nbsp-0.00*BSD +&nbsp-0.00*BSA +&nbsp-0.00*home_player_1_overall_rating +&nbsp-0.00*home_player_2_overall_rating +&nbsp-0.00*home_player_3_overall_rating +&nbsp-0.00*home_player_4_overall_rating +&nbsp-0.00*home_player_5_overall_rating +&nbsp-0.00*home_player_6_overall_rating +&nbsp-0.00*home_player_7_overall_rating +&nbsp-0.00*home_player_8_overall_rating +&nbsp-0.00*home_player_9_overall_rating +&nbsp-0.00*home_player_10_overall_rating +&nbsp-0.00*home_player_11_overall_rating +&nbsp-0.00*away_player_1_overall_rating +&nbsp-0.00*away_player_2_overall_rating +&nbsp-0.00*away_player_3_overall_rating +&nbsp-0.00*away_player_4_overall_rating +&nbsp-0.00*away_player_5_overall_rating +&nbsp-0.00*away_player_6_overall_rating +&nbsp-0.00*away_player_7_overall_rating +&nbsp-0.00*away_player_8_overall_rating +&nbsp-0.00*away_player_9_overall_rating +&nbsp-0.00*away_player_10_overall_rating +&nbsp-0.00*away_player_11_overall_rating52.8354.4-0.5555596240990914TN:&nbsp6,&nbspFP:&nbsp881,&nbspFN:&nbsp4,&nbspTP:&nbsp17890.5185347625610509
xTrees_droppedMeaningless_noScaler_addedParams_playerOverallRatingnan84.2553.170.5003932476692017TN:&nbsp2412,&nbspFP:&nbsp298,&nbspFN:&nbsp2,&nbspTP:&nbsp32010.530514956098139
RandomForest_droppedMeaningless_StandardScaler_addedParams_Positions_ResultLabelnan100.051.611.0Pred  Lose Draw Win True  Lose  0.46 0.03 0.51 Draw  0.27 0.04 0.69 Win  0.16 0.03 0.820.523843598262203
logreg_droppedMeaningless_StandardScaler_addedParams_Positions_ResultLabelRESULT =&nbsp-1.11 +&nbsp0.00*league_id +&nbsp0.00*season +&nbsp-0.46*stage +&nbsp-0.05*date +&nbsp0.49*home_team_api_id +&nbsp-0.03*away_team_api_id +&nbsp-0.03*home_player_1_position +&nbsp0.04*home_player_2_position +&nbsp0.02*home_player_3_position +&nbsp0.00*home_player_4_position +&nbsp0.01*home_player_5_position +&nbsp-0.01*home_player_6_position +&nbsp0.00*home_player_7_position +&nbsp0.01*home_player_8_position +&nbsp-0.01*home_player_9_position +&nbsp-0.03*home_player_10_position +&nbsp0.02*home_player_11_position +&nbsp0.00*away_player_1_position +&nbsp0.03*away_player_2_position +&nbsp0.01*away_player_3_position +&nbsp0.00*away_player_4_position +&nbsp0.02*away_player_5_position +&nbsp-0.03*away_player_6_position +&nbsp0.02*away_player_7_position +&nbsp-0.02*away_player_8_position +&nbsp-0.00*away_player_9_position +&nbsp-0.03*away_player_10_position +&nbsp0.00*away_player_11_position +&nbsp0.04*home_player_1 +&nbsp-0.02*home_player_2 +&nbsp0.02*home_player_3 +&nbsp0.01*home_player_4 +&nbsp-0.01*home_player_5 +&nbsp0.00*home_player_6 +&nbsp0.02*home_player_7 +&nbsp-0.01*home_player_8 +&nbsp0.00*home_player_9 +&nbsp-0.01*home_player_10 +&nbsp-0.02*home_player_11 +&nbsp0.02*away_player_1 +&nbsp-0.01*away_player_2 +&nbsp-0.05*away_player_3 +&nbsp-0.01*away_player_4 +&nbsp-0.03*away_player_5 +&nbsp-0.01*away_player_6 +&nbsp0.01*away_player_7 +&nbsp0.03*away_player_8 +&nbsp0.01*away_player_9 +&nbsp0.03*away_player_10 +&nbsp0.02*away_player_11 +&nbsp0.02*B365H +&nbsp0.42*B365D +&nbsp-0.11*B365A +&nbsp-0.12*BWH +&nbsp0.14*BWD +&nbsp-0.11*BWA +&nbsp-0.18*IWH +&nbsp-0.12*IWD +&nbsp0.10*IWA +&nbsp0.07*LBH +&nbsp0.02*LBD +&nbsp0.16*LBA +&nbsp0.25*PSH +&nbsp-0.01*PSD +&nbsp0.39*PSA +&nbsp-0.13*WHH +&nbsp-0.22*WHD +&nbsp0.10*WHA +&nbsp0.20*SJH +&nbsp0.02*SJD +&nbsp0.24*SJA +&nbsp-0.17*VCH +&nbsp0.05*VCD +&nbsp-0.45*VCA +&nbsp-0.03*GBH +&nbsp0.13*GBD +&nbsp0.17*GBA +&nbsp-0.68*BSH +&nbsp-0.08*BSD +&nbsp-0.40*BSA53.4351.96-0.5359402567508378Pred  Lose Draw Win True  Lose  0.43 0.01 0.56 Draw  0.23 0.01 0.76 Win  0.14 0.00 0.860.5274214157935088

In [35]:
df = pd.DataFrame(super_table[1:], columns=super_table[0])
df.to_csv(r'C:/Python/Soccer/out/results_table.csv', 
                      index=False)