notebook.community

Edit and run



In [0]:

    
# packages
import pandas as pd
import numpy as np
import datetime
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    import sklearn

# required machine learning packages
from sklearn import model_selection
from sklearn.feature_selection import RFE
from sklearn.metrics import brier_score_loss, roc_auc_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV as CCV

from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
import xgboost as xgb



In [0]:

    
# Code to read csv file into Colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



In [0]:

    
# loading CSV files

data_downloaded = drive.CreateFile({'id': '1bC5TaScWuEGhK1LPyti24xbQ1XGXBhNY'})
data_downloaded.GetContentFile('nfl_elo.csv')
data_downloaded = drive.CreateFile({'id': '1LBnfAOgwEdR3MqDTw5jdhLXmCx6cquzr'})
data_downloaded.GetContentFile('nfl_teams.csv')
data_downloaded = drive.CreateFile({'id': '1Qv02AygOdZvOhGWJwOsrsT550suhffoV'})
data_downloaded.GetContentFile('spreadspoke_scores.csv')


df = pd.read_csv("spreadspoke_scores.csv")
teams = pd.read_csv("nfl_teams.csv")
games_elo = pd.read_csv("nfl_elo.csv")



In [0]:

    
# replacing blank strings with NaN
df = df.replace(r'^\s*$', np.nan, regex=True)

# removing rows from specific columns that have null values, resetting index and changing data types
df = df[(df.score_home.isnull() == False) & (df.team_favorite_id.isnull() == False) & (df.over_under_line.isnull() == False) &
        (df.schedule_season >= 1979)]

df.reset_index(drop=True, inplace=True)
df['over_under_line'] = df.over_under_line.astype(float)

# mapping team_id to the correct teams
df['team_home'] = df.team_home.map(teams.set_index('team_name')['team_id'].to_dict())
df['team_away'] = df.team_away.map(teams.set_index('team_name')['team_id'].to_dict())

# fix team_favorite_id for Colts in 1969 and 1971 SB
df.loc[(df.schedule_season == 1968) & (df.schedule_week == 'Superbowl'), 'team_favorite_id'] = 'IND'
df.loc[(df.schedule_season == 1970) & (df.schedule_week == 'Superbowl'), 'team_favorite_id'] = 'IND'

# creating home favorite and away favorite columns (fill na with 0's)
df.loc[df.team_favorite_id == df.team_home, 'home_favorite'] = 1
df.loc[df.team_favorite_id == df.team_away, 'away_favorite'] = 1
df.home_favorite.fillna(0, inplace=True)
df.away_favorite.fillna(0, inplace=True)

# creating over / under column (fill na with 0's)
df.loc[((df.score_home + df.score_away) > df.over_under_line), 'over'] = 1
df.over.fillna(0, inplace=True)

# stadium neutral and schedule playoff as boolean
df['stadium_neutral'] = df.stadium_neutral.astype(int)
df['schedule_playoff'] = df.schedule_playoff.astype(int)

# change data type of date columns
df['schedule_date'] = pd.to_datetime(df['schedule_date'])
games_elo['date'] = pd.to_datetime(games_elo['date'])



In [0]:

    
# fixing some schedule_week column errors and converting column to integer data type
df.loc[(df.schedule_week == '18'), 'schedule_week'] = '17'
df.loc[(df.schedule_week == 'Wildcard') | (df.schedule_week == 'WildCard'), 'schedule_week'] = '18'
df.loc[(df.schedule_week == 'Division'), 'schedule_week'] = '19'
df.loc[(df.schedule_week == 'Conference'), 'schedule_week'] = '20'
df.loc[(df.schedule_week == 'Superbowl') | (df.schedule_week == 'SuperBowl'), 'schedule_week'] = '21'
df['schedule_week'] = df.schedule_week.astype(int)



In [0]:

    
# removing extra columns that aren't necessary for analysis
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite',
       'over_under_line', 'weather_temperature',
       'weather_wind_mph', 'score_home', 'score_away',
       'stadium_neutral', 'home_favorite', 'away_favorite',
       'over']]



In [0]:

    
# Cleaning games_elo and df to merge correctly
wsh_map = {'WSH' : 'WAS'}
games_elo.loc[games_elo.team1 == 'WSH', 'team1'] = 'WAS' 
games_elo.loc[games_elo.team2 == 'WSH', 'team2'] = 'WAS'

# fix dates
df.loc[(df.schedule_date == '2016-09-19') & (df.team_home == 'MIN'), 'schedule_date'] = datetime.datetime(2016, 9, 18)
df.loc[(df.schedule_date == '2017-01-22') & (df.schedule_week == 21), 'schedule_date'] = datetime.datetime(2017, 2, 5)
df.loc[(df.schedule_date == '1990-01-27') & (df.schedule_week == 21), 'schedule_date'] = datetime.datetime(1990, 1, 28)
df.loc[(df.schedule_date == '1990-01-13'), 'schedule_date'] = datetime.datetime(1990, 1, 14)
games_elo.loc[(games_elo.date == '2016-01-09'), 'date'] = datetime.datetime(2016, 1, 10)
games_elo.loc[(games_elo.date == '2016-01-08'), 'date'] = datetime.datetime(2016, 1, 9)
games_elo.loc[(games_elo.date == '2016-01-16'), 'date'] = datetime.datetime(2016, 1, 17)
games_elo.loc[(games_elo.date == '2016-01-15'), 'date'] = datetime.datetime(2016, 1, 16)



In [0]:

    
# merge games_elo with df
df = df.merge(games_elo, left_on=['schedule_date', 'team_home', 'team_away'], right_on=['date', 'team1', 'team2'], how='left')

# merge to fix neutral games where team_home and team_away are switched
games_elo2 = games_elo.rename(columns={'team1' : 'team2', 'team2' : 'team1', 'elo1' : 'elo2', 'elo2' : 'elo1'})
games_elo2['elo_prob1'] = 1 - games_elo2.elo_prob1
df = df.merge(games_elo2, left_on=['schedule_date', 'team_home', 'team_away'], right_on=['date', 'team1', 'team2'], how='left')



In [0]:

    
# separating merged columns into x and y cols
x_cols = ['date_x', 'season_x', 'neutral_x', 'playoff_x', 'team1_x', 'team2_x', 'elo1_x', 'elo2_x', 'elo_prob1_x', 'elo_prob2_x', 
          'qbelo_prob1_x', 'qbelo_prob2_x', 'score1_x', 'score2_x']
y_cols = ['date_y', 'season_y', 'neutral_y', 'playoff_y', 'team1_y', 'team2_y', 'elo1_y', 'elo2_y', 'elo_prob1_y', 'elo_prob2_y', 
          'qbelo_prob1_y', 'qbelo_prob2_y','score1_y', 'score2_y']

# filling null values for games_elo merged cols
for x, y in zip(x_cols, y_cols):
    df[x] = df[x].fillna(df[y]) 

# removing y_cols from dataframe    
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away',
       'stadium_neutral', 'home_favorite', 'away_favorite', 'over', 'neutral_x', 'playoff_x',
         'elo1_x', 'elo2_x', 'elo_prob1_x','elo_prob2_x', 'qbelo_prob1_x', 'qbelo_prob2_x' ]]

# remove _x ending from column names
df.columns = df.columns.str.replace('_x', '')



In [0]:

    
# creating result column df.loc[(df.score_home > df.score_away), 'result'

# HOME WIN
df['result'] = (df.score_home > df.score_away).astype(int)

# OVER
#df['result'] = (df.over).astype(int)

# HOME WIN AND ATS
#df['result'] = (df.home_favorite == 1) & (df.score_home > df.score_away) & ((df.score_away - df.score_home) > df.spread_favorite).astype(int)



In [0]:

    
# all column names
df.columns









    Out[0]:





Index(['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away',
       'stadium_neutral', 'home_favorite', 'away_favorite', 'over', 'neutral',
       'playoff', 'elo1', 'elo2', 'elo_prob1', 'elo_prob2', 'qbelo_prob1',
       'qbelo_prob2', 'result'],
      dtype='object')



In [0]:

    
# snapshot of data
df.tail(10)









    Out[0]:







  
    
      
      schedule_date
      schedule_season
      schedule_week
      team_home
      team_away
      team_favorite_id
      spread_favorite
      over_under_line
      weather_temperature
      weather_wind_mph
      score_home
      score_away
      stadium_neutral
      home_favorite
      away_favorite
      over
      neutral
      playoff
      elo1
      elo2
      elo_prob1
      elo_prob2
      qbelo_prob1
      qbelo_prob2
      result
    
  
  
    
      9886
      2019-09-22
      2019
      3
      GB
      DEN
      GB
      -7.5
      43.0
      0.0
      0.0
      27.0
      16.0
      0
      1.0
      0.0
      0.0
      0.0
      NaN
      1507.994649
      1419.884182
      0.707107
      0.292893
      0.743268
      0.256732
      1
    
    
      9887
      2019-09-22
      2019
      3
      KC
      BAL
      KC
      -5.5
      55.0
      0.0
      0.0
      33.0
      28.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1639.908760
      1604.360622
      0.640792
      0.359208
      0.690695
      0.309305
      1
    
    
      9888
      2019-09-22
      2019
      3
      LAC
      HOU
      LAC
      -3.0
      48.0
      0.0
      0.0
      20.0
      27.0
      0
      1.0
      0.0
      0.0
      0.0
      NaN
      1583.703049
      1513.605730
      0.685180
      0.314820
      0.659790
      0.340210
      0
    
    
      9889
      2019-09-22
      2019
      3
      MIN
      OAK
      MIN
      -9.0
      42.5
      0.0
      0.0
      34.0
      14.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1541.128678
      1408.111377
      0.757657
      0.242343
      0.737240
      0.262760
      1
    
    
      9890
      2019-09-22
      2019
      3
      NE
      NYJ
      NE
      -20.5
      45.5
      0.0
      0.0
      30.0
      14.0
      0
      1.0
      0.0
      0.0
      0.0
      NaN
      1677.301131
      1345.334919
      0.907637
      0.092363
      0.912006
      0.087994
      1
    
    
      9891
      2019-09-22
      2019
      3
      PHI
      DET
      PHI
      -5.5
      47.5
      0.0
      0.0
      24.0
      27.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1571.763505
      1504.718793
      0.681378
      0.318622
      0.698684
      0.301316
      0
    
    
      9892
      2019-09-22
      2019
      3
      SF
      PIT
      SF
      -6.5
      43.5
      0.0
      0.0
      24.0
      20.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1493.503009
      1538.911743
      0.528164
      0.471836
      0.786794
      0.213206
      1
    
    
      9893
      2019-09-22
      2019
      3
      SEA
      NO
      SEA
      -4.5
      44.5
      0.0
      0.0
      27.0
      33.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1581.220030
      1592.947548
      0.576070
      0.423930
      0.730921
      0.269079
      0
    
    
      9894
      2019-09-22
      2019
      3
      TB
      NYG
      TB
      -6.0
      48.0
      0.0
      0.0
      31.0
      32.0
      0
      1.0
      0.0
      1.0
      0.0
      NaN
      1439.762740
      1397.221631
      0.650004
      0.349996
      0.685660
      0.314340
      0
    
    
      9895
      2019-09-23
      2019
      3
      WAS
      CHI
      CHI
      -4.0
      41.5
      0.0
      0.0
      15.0
      31.0
      0
      0.0
      1.0
      1.0
      0.0
      NaN
      1414.227959
      1563.071253
      0.381629
      0.618371
      0.412243
      0.587757
      0



In [0]:

    
# null values by column
df.isnull().sum(axis=0)









    Out[0]:





schedule_date             0
schedule_season           0
schedule_week             0
team_home                 0
team_away                 0
team_favorite_id          0
spread_favorite           0
over_under_line           0
weather_temperature     245
weather_wind_mph        245
score_home                0
score_away                0
stadium_neutral           0
home_favorite             0
away_favorite             0
over                      0
neutral                   4
playoff                9538
elo1                      4
elo2                      4
elo_prob1                 4
elo_prob2                 4
qbelo_prob1               4
qbelo_prob2               4
result                    0
dtype: int64



In [0]:

    
# summary statistics
df.describe().transpose()









    Out[0]:







  
    
      
      count
      mean
      std
      min
      25%
      50%
      75%
      max
    
  
  
    
      schedule_season
      9896.0
      1999.583670
      11.427849
      1979.000000
      1990.000000
      2000.000000
      2009.000000
      2019.000000
    
    
      schedule_week
      9896.0
      9.251617
      5.204222
      1.000000
      5.000000
      9.000000
      14.000000
      21.000000
    
    
      spread_favorite
      9896.0
      -5.368230
      3.423312
      -26.500000
      -7.000000
      -4.500000
      -3.000000
      0.000000
    
    
      over_under_line
      9896.0
      41.870069
      4.671214
      28.000000
      38.000000
      41.500000
      45.000000
      63.500000
    
    
      weather_temperature
      9651.0
      59.437260
      15.979763
      -6.000000
      49.000000
      63.000000
      72.000000
      97.000000
    
    
      weather_wind_mph
      9651.0
      7.325251
      5.702686
      0.000000
      1.000000
      8.000000
      11.000000
      40.000000
    
    
      score_home
      9896.0
      22.643290
      10.393699
      0.000000
      16.000000
      22.000000
      30.000000
      62.000000
    
    
      score_away
      9896.0
      19.854082
      10.041910
      0.000000
      13.000000
      20.000000
      27.000000
      59.000000
    
    
      stadium_neutral
      9896.0
      0.007882
      0.088434
      0.000000
      0.000000
      0.000000
      0.000000
      1.000000
    
    
      home_favorite
      9896.0
      0.669867
      0.470285
      0.000000
      0.000000
      1.000000
      1.000000
      1.000000
    
    
      away_favorite
      9896.0
      0.315683
      0.464811
      0.000000
      0.000000
      0.000000
      1.000000
      1.000000
    
    
      over
      9896.0
      0.483630
      0.499757
      0.000000
      0.000000
      0.000000
      1.000000
      1.000000
    
    
      neutral
      9892.0
      0.007279
      0.085008
      0.000000
      0.000000
      0.000000
      0.000000
      1.000000
    
    
      elo1
      9892.0
      1507.540308
      98.507019
      1197.301000
      1438.801657
      1506.863000
      1575.361750
      1849.484000
    
    
      elo2
      9892.0
      1506.790805
      96.755981
      1201.561463
      1439.192750
      1507.567000
      1575.960250
      1825.961000
    
    
      elo_prob1
      9892.0
      0.582324
      0.167005
      0.070953
      0.465624
      0.592212
      0.708533
      0.964578
    
    
      elo_prob2
      9892.0
      0.417369
      0.166854
      0.035422
      0.291338
      0.407719
      0.534117
      0.929047
    
    
      qbelo_prob1
      9892.0
      0.575710
      0.170887
      0.059810
      0.454738
      0.586443
      0.706254
      0.967197
    
    
      qbelo_prob2
      9892.0
      0.424290
      0.170887
      0.032803
      0.293746
      0.413557
      0.545262
      0.940190
    
    
      result
      9896.0
      0.580234
      0.493545
      0.000000
      0.000000
      1.000000
      1.000000
      1.000000



In [0]:

    
# some percentages to take into consideration when betting
home_win = "{:.2f}".format((sum((df.result == 1) & (df.stadium_neutral == 0)) / len(df)) * 100)
away_win = "{:.2f}".format((sum((df.result == 0) & (df.stadium_neutral == 0)) / len(df)) * 100)
under_line = "{:.2f}".format((sum((df.score_home + df.score_away) < df.over_under_line) / len(df)) * 100)
over_line = "{:.2f}".format((sum((df.score_home + df.score_away) > df.over_under_line) / len(df)) * 100)

favored = "{:.2f}".format((sum(((df.home_favorite == 1) & (df.result == 1)) | ((df.away_favorite == 1) & (df.result == 0)))
                           / len(df)) * 100)

cover = "{:.2f}".format((sum(((df.home_favorite == 1) & ((df.score_away - df.score_home) < df.spread_favorite)) | 
                             ((df.away_favorite == 1) & ((df.score_home - df.score_away) < df.spread_favorite))) 
                         / len(df)) * 100)

ats = "{:.2f}".format((sum(((df.home_favorite == 1) & ((df.score_away - df.score_home) > df.spread_favorite)) | 
                           ((df.away_favorite == 1) & ((df.score_home - df.score_away) > df.spread_favorite))) 
                       / len(df)) * 100)



In [0]:

    
# print all percentages
print("Number of Games: " + str(len(df)))
print("Home Straight Up Win Percentage: " + home_win + "%")
print("Away Straight Up Win Percentage: " + away_win + "%")
print("Under Percentage: " + under_line + "%")
print("Over Percentage: " + over_line + "%")
print("Favored Win Percentage: " + favored + "%")
print("Cover The Spread Percentage: " + cover + "%")
print("Against The Spread Percentage: " + ats + "%")









    



Number of Games: 9896
Home Straight Up Win Percentage: 57.65%
Away Straight Up Win Percentage: 41.56%
Under Percentage: 49.76%
Over Percentage: 48.36%
Favored Win Percentage: 65.14%
Cover The Spread Percentage: 46.36%
Against The Spread Percentage: 49.42%



In [0]:

    
# creating 2 separate dataframes with the home teams / scores and the away teams / scores
score = df.groupby(['schedule_season', 'schedule_week', 'team_home']).mean()[['score_home', 'score_away']].reset_index()
aw_score = df.groupby(['schedule_season', 'schedule_week', 'team_away']).mean()[['score_home', 'score_away']].reset_index()

# create total pts column
score['point_diff'] = score.score_home - score.score_away
aw_score['point_diff'] = aw_score.score_away - aw_score.score_home

# append the two dataframes
score = score.append(aw_score, ignore_index=True, sort=True)

# fill null values
score.team_home.fillna(score.team_away, inplace=True)

# sort by season and week 
score.sort_values(['schedule_season', 'schedule_week'], ascending = [True, True], inplace=True)

# removing unneeded columns & changing column name 
score = score[['schedule_season', 'schedule_week', 'team_home', 'point_diff']]
score.rename(columns={'team_home' : 'team'}, inplace=True)



In [0]:

    
# dictionary of dataframes - separate dataframe for each team
tm_dict = {}
for key in score.team.unique():
    tm_dict[key] = score[score.team == key].reset_index(drop=True)



In [0]:

    
# dataframe to populate
pts_diff = pd.DataFrame()

# for loop to create a rolling average of the previous games for each season
for yr in score.schedule_season.unique():
    for tm in score.team.unique():
        data = tm_dict[tm].copy()
        data = data[data.schedule_season == yr]
        
        data.loc[:, 'avg_pts_diff'] = data.point_diff.shift().expanding().mean()
        
        pts_diff = pts_diff.append(data)



In [0]:

    
# merging to df and changing column names
df = df.merge(pts_diff[['schedule_season', 'schedule_week', 'team', 'avg_pts_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_home'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'avg_pts_diff' : 'hm_avg_pts_diff'}, inplace=True)

df = df.merge(pts_diff[['schedule_season', 'schedule_week', 'team', 'avg_pts_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_away'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'avg_pts_diff' : 'aw_avg_pts_diff'}, inplace=True)



In [0]:

    
# average point differential over entire season
total_season = pts_diff.groupby(['schedule_season', 'team']).mean()['point_diff'].reset_index()



In [0]:

    
# adding schedule week for merge and adding one to the season for predictions
total_season['schedule_week'] = 1
total_season['schedule_season'] += 1



In [0]:

    
# cleaning of columns
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away', 'stadium_neutral', 'home_favorite',
       'away_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1', 'elo2', 'elo_prob1', 'elo_prob2', 'qbelo_prob1', 'qbelo_prob2', 'over', 'result']]



In [0]:

    
# merge to have previous seasons average point differential
df = df.merge(total_season[['schedule_season', 'schedule_week', 'team', 'point_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_home'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'point_diff' : 'hm_avg_diff'}, inplace=True)

df = df.merge(total_season[['schedule_season', 'schedule_week', 'team', 'point_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_away'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'point_diff' : 'aw_avg_diff'}, inplace=True)

# fill null values
df.hm_avg_pts_diff.fillna(df.hm_avg_diff, inplace=True)
df.aw_avg_pts_diff.fillna(df.aw_avg_diff, inplace=True)



In [0]:

    
# cleaning of columns
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away', 'stadium_neutral', 'home_favorite',
       'away_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1', 'elo2', 'elo_prob1', 'elo_prob2', 'qbelo_prob1', 'qbelo_prob2', 'over', 'result']]



In [0]:

    
# removing all rows with null values
df = df.dropna(how='any',axis=0)



In [0]:

    
# initial features possible for model
X = df[['schedule_season', 'schedule_week', 'over_under_line', 'spread_favorite', 'weather_temperature', 'weather_wind_mph',
        'home_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1', 'elo2', 'elo_prob1', 'elo_prob2', 'qbelo_prob1', 'qbelo_prob2']]

y = df['result']



In [0]:

    
# base model
base = LDA()

# choose 5 best features
rfe = RFE(base, 5)
rfe = rfe.fit(X, y)

# features
print(rfe.support_)
print(rfe.ranking_)









    



/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")






    



[False False False False False False  True False False False False  True
  True  True  True]
[ 9  4  8  2 10  7  1  3 11  6  5  1  1  1  1]






    



/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")



In [0]:

    
# best 5 features chosen by the RFE base model
final_x = df[['spread_favorite', 'home_favorite', 'hm_avg_pts_diff', 'elo_prob1', 'elo_prob2', 'qbelo_prob1', 'qbelo_prob2']]
final_x









    Out[0]:







  
    
      
      spread_favorite
      home_favorite
      hm_avg_pts_diff
      elo_prob1
      elo_prob2
      qbelo_prob1
      qbelo_prob2
    
  
  
    
      14
      -2.0
      1.0
      10.000000
      0.656962
      0.343038
      0.645891
      0.354109
    
    
      15
      0.0
      0.0
      -14.000000
      0.566285
      0.433715
      0.533151
      0.466849
    
    
      16
      -3.0
      0.0
      -2.000000
      0.502268
      0.497732
      0.474937
      0.525063
    
    
      17
      -6.0
      1.0
      3.000000
      0.572482
      0.427518
      0.567900
      0.432100
    
    
      18
      0.0
      0.0
      -15.000000
      0.587308
      0.412692
      0.493688
      0.506312
    
    
      19
      -3.0
      1.0
      -3.000000
      0.620610
      0.379390
      0.611216
      0.388784
    
    
      20
      0.0
      0.0
      14.000000
      0.492412
      0.507588
      0.480420
      0.519580
    
    
      21
      -7.0
      1.0
      2.000000
      0.744615
      0.255385
      0.734354
      0.265646
    
    
      22
      -8.0
      1.0
      -3.000000
      0.684211
      0.315789
      0.642013
      0.357987
    
    
      23
      -3.0
      0.0
      -6.000000
      0.515880
      0.484120
      0.514806
      0.485194
    
    
      24
      -6.0
      1.0
      3.000000
      0.733198
      0.266802
      0.736662
      0.263338
    
    
      25
      -3.0
      1.0
      17.000000
      0.605790
      0.394210
      0.585195
      0.414805
    
    
      26
      -13.0
      0.0
      -6.000000
      0.203429
      0.796571
      0.195525
      0.804475
    
    
      27
      -4.0
      1.0
      6.000000
      0.636106
      0.363894
      0.624739
      0.375261
    
    
      28
      -3.0
      0.0
      5.000000
      0.516870
      0.483130
      0.523518
      0.476482
    
    
      29
      -6.0
      0.0
      -18.500000
      0.424853
      0.575147
      0.397621
      0.602379
    
    
      30
      -7.0
      1.0
      3.000000
      0.757372
      0.242628
      0.767868
      0.232132
    
    
      31
      -9.0
      1.0
      4.500000
      0.782305
      0.217695
      0.817339
      0.182661
    
    
      32
      -2.0
      1.0
      3.000000
      0.642566
      0.357434
      0.641037
      0.358963
    
    
      33
      -5.0
      1.0
      -14.500000
      0.768188
      0.231812
      0.737951
      0.262049
    
    
      34
      -13.0
      1.0
      -1.500000
      0.827084
      0.172916
      0.812240
      0.187760
    
    
      35
      -7.0
      0.0
      -6.500000
      0.439397
      0.560603
      0.432023
      0.567977
    
    
      36
      -3.0
      1.0
      -7.500000
      0.463061
      0.536939
      0.455162
      0.544838
    
    
      37
      -6.0
      1.0
      -28.000000
      0.601852
      0.398148
      0.659437
      0.340563
    
    
      38
      -9.0
      1.0
      18.500000
      0.780578
      0.219422
      0.780983
      0.219017
    
    
      39
      -3.0
      1.0
      -13.000000
      0.453306
      0.546694
      0.430703
      0.569297
    
    
      40
      -6.0
      0.0
      6.000000
      0.316497
      0.683503
      0.305767
      0.694233
    
    
      41
      -6.0
      1.0
      0.500000
      0.695947
      0.304053
      0.703896
      0.296104
    
    
      42
      -3.0
      1.0
      5.666667
      0.549427
      0.450573
      0.551861
      0.448139
    
    
      43
      -3.0
      0.0
      -14.333333
      0.390170
      0.609830
      0.316901
      0.683099
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      9990
      -12.5
      0.0
      49.000000
      0.832054
      0.167946
      0.805902
      0.194098
    
    
      9991
      0.0
      1.0
      -1.000000
      0.546993
      0.453007
      0.466439
      0.533561
    
    
      9992
      -3.0
      0.0
      -8.000000
      0.413819
      0.586181
      0.459613
      0.540387
    
    
      9993
      -1.0
      0.0
      0.000000
      0.434138
      0.565862
      0.409146
      0.590854
    
    
      9994
      -3.0
      1.0
      7.000000
      0.493512
      0.506488
      0.557226
      0.442774
    
    
      9995
      -3.0
      1.0
      30.000000
      0.601295
      0.398705
      0.676157
      0.323843
    
    
      9996
      -7.5
      1.0
      -2.000000
      0.690490
      0.309510
      0.804020
      0.195980
    
    
      9997
      -7.0
      0.0
      8.000000
      0.319582
      0.680418
      0.306070
      0.693930
    
    
      9998
      -2.0
      1.0
      3.000000
      0.584798
      0.415202
      0.556322
      0.443678
    
    
      9999
      -18.0
      0.0
      -49.000000
      0.230725
      0.769275
      0.186361
      0.813639
    
    
      10000
      -1.5
      0.0
      -18.000000
      0.553512
      0.446488
      0.482772
      0.517228
    
    
      10001
      -4.0
      1.0
      -30.000000
      0.569004
      0.430996
      0.584538
      0.415462
    
    
      10002
      -6.0
      0.0
      -5.000000
      0.409885
      0.590115
      0.377841
      0.622159
    
    
      10003
      -6.5
      0.0
      -1.000000
      0.531159
      0.468841
      0.500309
      0.499691
    
    
      10004
      -2.0
      0.0
      -7.500000
      0.437474
      0.562526
      0.331980
      0.668020
    
    
      10005
      -2.5
      1.0
      -3.000000
      0.455128
      0.544872
      0.611187
      0.388813
    
    
      10006
      -1.5
      1.0
      -2.000000
      0.651773
      0.348227
      0.538457
      0.461543
    
    
      10007
      -6.0
      1.0
      7.500000
      0.716570
      0.283430
      0.737074
      0.262926
    
    
      10008
      -4.0
      0.0
      -5.000000
      0.338904
      0.661096
      0.390260
      0.609740
    
    
      10009
      -22.0
      1.0
      14.000000
      0.829257
      0.170743
      0.910799
      0.089201
    
    
      10010
      -7.5
      1.0
      6.000000
      0.707107
      0.292893
      0.743268
      0.256732
    
    
      10011
      -5.5
      1.0
      16.000000
      0.640792
      0.359208
      0.690695
      0.309305
    
    
      10012
      -3.0
      1.0
      1.500000
      0.685180
      0.314820
      0.659790
      0.340210
    
    
      10013
      -9.0
      1.0
      5.500000
      0.757657
      0.242343
      0.737240
      0.262760
    
    
      10014
      -20.5
      1.0
      36.500000
      0.907637
      0.092363
      0.912006
      0.087994
    
    
      10015
      -5.5
      1.0
      0.500000
      0.681378
      0.318622
      0.698684
      0.301316
    
    
      10016
      -6.5
      1.0
      19.000000
      0.528164
      0.471836
      0.786794
      0.213206
    
    
      10017
      -4.5
      1.0
      1.500000
      0.576070
      0.423930
      0.730921
      0.269079
    
    
      10018
      -6.0
      1.0
      -4.000000
      0.650004
      0.349996
      0.685660
      0.314340
    
    
      10019
      -4.0
      0.0
      -7.500000
      0.381629
      0.618371
      0.412243
      0.587757
    
  

9745 rows × 7 columns



In [0]:

    
# prepare models
models = []

models.append(('LRG', LogisticRegression(solver='liblinear')))
models.append(('KNB', KNeighborsClassifier()))
models.append(('GNB', GaussianNB()))
models.append(('XGB', xgb.XGBClassifier(random_state=0)))
models.append(('RFC', RandomForestClassifier(random_state=0, n_estimators=100)))
models.append(('DTC', DecisionTreeClassifier(random_state=0, criterion='entropy', max_depth=5)))

# evaluate each model by average and standard deviations of roc auc 
results = []
names = []

for name, m in models:
    kfold = model_selection.KFold(n_splits=5, random_state=0)
    cv_results = model_selection.cross_val_score(m, final_x, y, cv=kfold, scoring = 'roc_auc')
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)









    



LRG: 0.690277 (0.015071)
KNB: 0.628918 (0.005377)
GNB: 0.689834 (0.013784)
XGB: 0.692945 (0.013838)
RFC: 0.654952 (0.011307)
DTC: 0.687032 (0.010799)



In [0]:

    
# training and testing data (2017 and 2018)
train = df.copy()
test = df.copy()
train = train.loc[train['schedule_season'] < 2019]
test = test.loc[test['schedule_season'] > 2010]
X_train = train[['over_under_line', 'spread_favorite', 'home_favorite', 'hm_avg_pts_diff', 'elo_prob1', 'qbelo_prob1', 'qbelo_prob2']]
y_train = train['result']
X_test = test[['over_under_line', 'spread_favorite', 'home_favorite', 'hm_avg_pts_diff', 'elo_prob1', 'qbelo_prob1', 'qbelo_prob2',]]
y_test = test['result']

train.head()









    Out[0]:







  
    
      
      schedule_date
      schedule_season
      schedule_week
      team_home
      team_away
      team_favorite_id
      spread_favorite
      over_under_line
      weather_temperature
      weather_wind_mph
      score_home
      score_away
      stadium_neutral
      home_favorite
      away_favorite
      hm_avg_pts_diff
      aw_avg_pts_diff
      elo1
      elo2
      elo_prob1
      elo_prob2
      qbelo_prob1
      qbelo_prob2
      over
      result
    
  
  
    
      14
      1979-09-06
      1979
      2
      DEN
      LAR
      DEN
      -2.0
      31.5
      67.0
      8.0
      9.0
      13.0
      0
      1.0
      0.0
      10.0
      -7.0
      1592.275
      1544.396
      0.656962
      0.343038
      0.645891
      0.354109
      0.0
      0
    
    
      15
      1979-09-09
      1979
      2
      IND
      TB
      PICK
      0.0
      33.0
      64.0
      8.0
      26.0
      29.0
      0
      0.0
      0.0
      -14.0
      15.0
      1397.713
      1416.381
      0.566285
      0.433715
      0.533151
      0.466849
      1.0
      0
    
    
      16
      1979-09-09
      1979
      2
      BUF
      CIN
      CIN
      -3.0
      34.0
      54.0
      7.0
      51.0
      24.0
      0
      0.0
      1.0
      -2.0
      -10.0
      1414.802
      1478.226
      0.502268
      0.497732
      0.474937
      0.525063
      1.0
      1
    
    
      17
      1979-09-09
      1979
      2
      CHI
      MIN
      CHI
      -6.0
      31.5
      60.0
      11.0
      26.0
      7.0
      0
      1.0
      0.0
      3.0
      6.0
      1495.817
      1510.094
      0.572482
      0.427518
      0.567900
      0.432100
      1.0
      1
    
    
      18
      1979-09-09
      1979
      2
      DET
      WAS
      PICK
      0.0
      35.5
      72.0
      0.0
      24.0
      27.0
      0
      0.0
      0.0
      -15.0
      -2.0
      1455.863
      1459.567
      0.587308
      0.412692
      0.493688
      0.506312
      1.0
      0



In [0]:

    
# calibrate probabilities and fit model to training data
boost = xgb.XGBClassifier()
dtc = DecisionTreeClassifier(max_depth=5, criterion='entropy')
lrg = LogisticRegression(solver='liblinear')
vote = VotingClassifier(estimators=[('boost', boost), ('dtc', dtc), ('lrg', lrg)], voting='soft')

model = CCV(vote, method='isotonic', cv=3)
model.fit(X_train, y_train)









    Out[0]:





CalibratedClassifierCV(base_estimator=VotingClassifier(estimators=[('boost',
                                                                    XGBClassifier(base_score=0.5,
                                                                                  booster='gbtree',
                                                                                  colsample_bylevel=1,
                                                                                  colsample_bynode=1,
                                                                                  colsample_bytree=1,
                                                                                  gamma=0,
                                                                                  learning_rate=0.1,
                                                                                  max_delta_step=0,
                                                                                  max_depth=3,
                                                                                  min_child_weight=1,
                                                                                  missing=None,
                                                                                  n_estimators=100,
                                                                                  n_jobs=1,
                                                                                  nthread=None,
                                                                                  objective='binary:logistic',
                                                                                  random_state=0,
                                                                                  r...
                                                                                           splitter='best')),
                                                                   ('lrg',
                                                                    LogisticRegression(C=1.0,
                                                                                       class_weight=None,
                                                                                       dual=False,
                                                                                       fit_intercept=True,
                                                                                       intercept_scaling=1,
                                                                                       l1_ratio=None,
                                                                                       max_iter=100,
                                                                                       multi_class='warn',
                                                                                       n_jobs=None,
                                                                                       penalty='l2',
                                                                                       random_state=None,
                                                                                       solver='liblinear',
                                                                                       tol=0.0001,
                                                                                       verbose=0,
                                                                                       warm_start=False))],
                                                       flatten_transform=True,
                                                       n_jobs=None,
                                                       voting='soft',
                                                       weights=None),
                       cv=3, method='isotonic')



In [0]:

    
# predict probabilities
predicted = model.predict_proba(X_test)[:,1]



In [0]:

    
# ROC AUC Score higher is better while Brier Score the lower the better
print("Metrics" + "\t\t" + "My Model" + "\t" + "Elo Results")
print("ROC AUC Score: " +  "\t" + "{:.4f}".format(roc_auc_score(y_test, predicted)) + "\t\t" + "{:.4f}".format(roc_auc_score(test.result, test.elo_prob1)))
print("Brier Score: " + "\t" + "{:.4f}".format(brier_score_loss(y_test, predicted)) + "\t\t" + "{:.4f}".format(brier_score_loss(test.result, test.elo_prob1)))









    



Metrics		My Model	Elo Results
ROC AUC Score: 	0.7363		0.6924
Brier Score: 	0.2039		0.2175



In [0]:

    
# creating a column with the models probabilities to analyze vs elo fivethirtyeight
test.loc[:,'hm_prob'] = predicted
test = test[['schedule_season', 'schedule_week', 'team_home', 'team_away', 'elo_prob1', 'hm_prob', 'result']]



In [0]:

    
# calulate bets won (only make a bet when probability is greater than / equal to 60% or less than / equal to 40%)
test['my_bet_won'] = (((test.hm_prob >= 0.60) & (test.result == 1)) | ((test.hm_prob <= 0.40) & (test.result == 0))).astype(int)
test['elo_bet_won'] = (((test.elo_prob1 >= 0.60) & (test.result == 1)) | ((test.elo_prob1 <= 0.40) & (test.result == 0))).astype(int)

# calulate bets lost (only make a bet when probability is greater than / equal to 60% or less than / equal to 40%)
test['my_bet_lost'] = (((test.hm_prob >= 0.60) & (test.result == 0)) | ((test.hm_prob <= 0.40) & (test.result == 1))).astype(int)
test['elo_bet_lost'] = (((test.elo_prob1 >= 0.60) & (test.result == 0)) | ((test.elo_prob1 <= 0.40) & (test.result == 1))).astype(int)



In [0]:

    
# printing some quick overall results for my model
print("My Model Win Percentage: " + "{:.4f}".format(test.my_bet_won.sum() / (test.my_bet_lost.sum() + test.my_bet_won.sum())))
print("Total Number of Bets Won: " + str(test.my_bet_won.sum()))
print("Total Number of Bets Made: " + str((test.my_bet_lost.sum() + test.my_bet_won.sum())))
print("Possible Games: " + str(len(test)))









    



My Model Win Percentage: 0.7449
Total Number of Bets Won: 1057
Total Number of Bets Made: 1419
Possible Games: 2151



In [0]:

    
# printing some quick overall results for fivethirtyeight's ELO model
print("ELO Model Win Percentage: " + "{:.4f}".format(test.elo_bet_won.sum()/(test.elo_bet_lost.sum() + test.elo_bet_won.sum())))
print("Total Number of Bets Won: " + str(test.elo_bet_won.sum()))
print("Total Number of Bets Made: " + str((test.elo_bet_lost.sum() + test.elo_bet_won.sum())))
print("Possible Games: " + str(len(test)))









    



ELO Model Win Percentage: 0.7015
Total Number of Bets Won: 973
Total Number of Bets Made: 1387
Possible Games: 2151



In [0]:

    
# creating week by week results
results_df = test.groupby(['schedule_season', 'schedule_week']).agg({'team_home' : 'count', 'my_bet_won' : 'sum', 
'elo_bet_won' : 'sum', 'my_bet_lost' : 'sum', 'elo_bet_lost' : 'sum'}).reset_index().rename(columns=
                                                                                            {'team_home' : 'total_games'})

# counting total bets for my model and the ELO model (prob >= 60% or prob <= 40%)
results_df['total_bets'] = results_df.my_bet_won + results_df.my_bet_lost
results_df['elo_total_bets'] = results_df.elo_bet_won + results_df.elo_bet_lost

# creating accuracy columns based on bets made not on total games
results_df['bet_accuracy'] = round((results_df.my_bet_won / results_df.total_bets) * 100, 2)
results_df['elo_bet_accuracy'] = round((results_df.elo_bet_won / results_df.elo_total_bets) * 100, 2)
results_df = results_df[['schedule_season', 'schedule_week', 'bet_accuracy', 'elo_bet_accuracy',
                         'total_bets', 'elo_total_bets', 'total_games']]



In [0]:

    
results_df.tail()









    Out[0]:







  
    
      
      schedule_season
      schedule_week
      bet_accuracy
      elo_bet_accuracy
      total_bets
      elo_total_bets
      total_games
    
  
  
    
      160
      2018
      20
      0.00
      0.00
      2
      2
      2
    
    
      161
      2018
      21
      NaN
      NaN
      0
      0
      1
    
    
      162
      2019
      1
      83.33
      81.82
      12
      11
      16
    
    
      163
      2019
      2
      83.33
      66.67
      6
      6
      16
    
    
      164
      2019
      3
      69.23
      75.00
      13
      12
      16



In [0]:

    
model.save("nflmodel1.h5")









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-42-84f49ef6abd9> in <module>()
----> 1 model.save("nflmodel1.h5")

AttributeError: 'CalibratedClassifierCV' object has no attribute 'save'

	schedule_date	schedule_season	schedule_week	team_home	team_away	team_favorite_id	spread_favorite	over_under_line	score_home	score_away	home_favorite	away_favorite	over	playoff	elo1	elo2	elo_prob1	elo_prob2	qbelo_prob1	qbelo_prob2	result
9886	2019-09-22	2019	3	GB	DEN	GB	-7.5	43.0	27.0	16.0	1.0	0.0	0.0	NaN	1507.994649	1419.884182	0.707107	0.292893	0.743268	0.256732	1
9887	2019-09-22	2019	3	KC	BAL	KC	-5.5	55.0	33.0	28.0	1.0	0.0	1.0	NaN	1639.908760	1604.360622	0.640792	0.359208	0.690695	0.309305	1
9888	2019-09-22	2019	3	LAC	HOU	LAC	-3.0	48.0	20.0	27.0	1.0	0.0	0.0	NaN	1583.703049	1513.605730	0.685180	0.314820	0.659790	0.340210	0
9889	2019-09-22	2019	3	MIN	OAK	MIN	-9.0	42.5	34.0	14.0	1.0	0.0	1.0	NaN	1541.128678	1408.111377	0.757657	0.242343	0.737240	0.262760	1
9890	2019-09-22	2019	3	NE	NYJ	NE	-20.5	45.5	30.0	14.0	1.0	0.0	0.0	NaN	1677.301131	1345.334919	0.907637	0.092363	0.912006	0.087994	1
9891	2019-09-22	2019	3	PHI	DET	PHI	-5.5	47.5	24.0	27.0	1.0	0.0	1.0	NaN	1571.763505	1504.718793	0.681378	0.318622	0.698684	0.301316	0
9892	2019-09-22	2019	3	SF	PIT	SF	-6.5	43.5	24.0	20.0	1.0	0.0	1.0	NaN	1493.503009	1538.911743	0.528164	0.471836	0.786794	0.213206	1
9893	2019-09-22	2019	3	SEA	NO	SEA	-4.5	44.5	27.0	33.0	1.0	0.0	1.0	NaN	1581.220030	1592.947548	0.576070	0.423930	0.730921	0.269079	0
9894	2019-09-22	2019	3	TB	NYG	TB	-6.0	48.0	31.0	32.0	1.0	0.0	1.0	NaN	1439.762740	1397.221631	0.650004	0.349996	0.685660	0.314340	0
9895	2019-09-23	2019	3	WAS	CHI	CHI	-4.0	41.5	15.0	31.0	0.0	1.0	1.0	NaN	1414.227959	1563.071253	0.381629	0.618371	0.412243	0.587757	0

	count	mean	std	min	25%	50%	75%	max
schedule_season	9896.0	1999.583670	11.427849	1979.000000	1990.000000	2000.000000	2009.000000	2019.000000
schedule_week	9896.0	9.251617	5.204222	1.000000	5.000000	9.000000	14.000000	21.000000
spread_favorite	9896.0	-5.368230	3.423312	-26.500000	-7.000000	-4.500000	-3.000000	0.000000
over_under_line	9896.0	41.870069	4.671214	28.000000	38.000000	41.500000	45.000000	63.500000
weather_temperature	9651.0	59.437260	15.979763	-6.000000	49.000000	63.000000	72.000000	97.000000
weather_wind_mph	9651.0	7.325251	5.702686	0.000000	1.000000	8.000000	11.000000	40.000000
score_home	9896.0	22.643290	10.393699	0.000000	16.000000	22.000000	30.000000	62.000000
score_away	9896.0	19.854082	10.041910	0.000000	13.000000	20.000000	27.000000	59.000000
stadium_neutral	9896.0	0.007882	0.088434	0.000000	0.000000	0.000000	0.000000	1.000000
home_favorite	9896.0	0.669867	0.470285	0.000000	0.000000	1.000000	1.000000	1.000000
away_favorite	9896.0	0.315683	0.464811	0.000000	0.000000	0.000000	1.000000	1.000000
over	9896.0	0.483630	0.499757	0.000000	0.000000	0.000000	1.000000	1.000000
neutral	9892.0	0.007279	0.085008	0.000000	0.000000	0.000000	0.000000	1.000000
elo1	9892.0	1507.540308	98.507019	1197.301000	1438.801657	1506.863000	1575.361750	1849.484000
elo2	9892.0	1506.790805	96.755981	1201.561463	1439.192750	1507.567000	1575.960250	1825.961000
elo_prob1	9892.0	0.582324	0.167005	0.070953	0.465624	0.592212	0.708533	0.964578
elo_prob2	9892.0	0.417369	0.166854	0.035422	0.291338	0.407719	0.534117	0.929047
qbelo_prob1	9892.0	0.575710	0.170887	0.059810	0.454738	0.586443	0.706254	0.967197
qbelo_prob2	9892.0	0.424290	0.170887	0.032803	0.293746	0.413557	0.545262	0.940190
result	9896.0	0.580234	0.493545	0.000000	0.000000	1.000000	1.000000	1.000000

	spread_favorite	home_favorite	hm_avg_pts_diff	elo_prob1	elo_prob2	qbelo_prob1	qbelo_prob2
14	-2.0	1.0	10.000000	0.656962	0.343038	0.645891	0.354109
15	0.0	0.0	-14.000000	0.566285	0.433715	0.533151	0.466849
16	-3.0	0.0	-2.000000	0.502268	0.497732	0.474937	0.525063
17	-6.0	1.0	3.000000	0.572482	0.427518	0.567900	0.432100
18	0.0	0.0	-15.000000	0.587308	0.412692	0.493688	0.506312
19	-3.0	1.0	-3.000000	0.620610	0.379390	0.611216	0.388784
20	0.0	0.0	14.000000	0.492412	0.507588	0.480420	0.519580
21	-7.0	1.0	2.000000	0.744615	0.255385	0.734354	0.265646
22	-8.0	1.0	-3.000000	0.684211	0.315789	0.642013	0.357987
23	-3.0	0.0	-6.000000	0.515880	0.484120	0.514806	0.485194
24	-6.0	1.0	3.000000	0.733198	0.266802	0.736662	0.263338
25	-3.0	1.0	17.000000	0.605790	0.394210	0.585195	0.414805
26	-13.0	0.0	-6.000000	0.203429	0.796571	0.195525	0.804475
27	-4.0	1.0	6.000000	0.636106	0.363894	0.624739	0.375261
28	-3.0	0.0	5.000000	0.516870	0.483130	0.523518	0.476482
29	-6.0	0.0	-18.500000	0.424853	0.575147	0.397621	0.602379
30	-7.0	1.0	3.000000	0.757372	0.242628	0.767868	0.232132
31	-9.0	1.0	4.500000	0.782305	0.217695	0.817339	0.182661
32	-2.0	1.0	3.000000	0.642566	0.357434	0.641037	0.358963
33	-5.0	1.0	-14.500000	0.768188	0.231812	0.737951	0.262049
34	-13.0	1.0	-1.500000	0.827084	0.172916	0.812240	0.187760
35	-7.0	0.0	-6.500000	0.439397	0.560603	0.432023	0.567977
36	-3.0	1.0	-7.500000	0.463061	0.536939	0.455162	0.544838
37	-6.0	1.0	-28.000000	0.601852	0.398148	0.659437	0.340563
38	-9.0	1.0	18.500000	0.780578	0.219422	0.780983	0.219017
39	-3.0	1.0	-13.000000	0.453306	0.546694	0.430703	0.569297
40	-6.0	0.0	6.000000	0.316497	0.683503	0.305767	0.694233
41	-6.0	1.0	0.500000	0.695947	0.304053	0.703896	0.296104
42	-3.0	1.0	5.666667	0.549427	0.450573	0.551861	0.448139
43	-3.0	0.0	-14.333333	0.390170	0.609830	0.316901	0.683099
...	...	...	...	...	...	...	...
9990	-12.5	0.0	49.000000	0.832054	0.167946	0.805902	0.194098
9991	0.0	1.0	-1.000000	0.546993	0.453007	0.466439	0.533561
9992	-3.0	0.0	-8.000000	0.413819	0.586181	0.459613	0.540387
9993	-1.0	0.0	0.000000	0.434138	0.565862	0.409146	0.590854
9994	-3.0	1.0	7.000000	0.493512	0.506488	0.557226	0.442774
9995	-3.0	1.0	30.000000	0.601295	0.398705	0.676157	0.323843
9996	-7.5	1.0	-2.000000	0.690490	0.309510	0.804020	0.195980
9997	-7.0	0.0	8.000000	0.319582	0.680418	0.306070	0.693930
9998	-2.0	1.0	3.000000	0.584798	0.415202	0.556322	0.443678
9999	-18.0	0.0	-49.000000	0.230725	0.769275	0.186361	0.813639
10000	-1.5	0.0	-18.000000	0.553512	0.446488	0.482772	0.517228
10001	-4.0	1.0	-30.000000	0.569004	0.430996	0.584538	0.415462
10002	-6.0	0.0	-5.000000	0.409885	0.590115	0.377841	0.622159
10003	-6.5	0.0	-1.000000	0.531159	0.468841	0.500309	0.499691
10004	-2.0	0.0	-7.500000	0.437474	0.562526	0.331980	0.668020
10005	-2.5	1.0	-3.000000	0.455128	0.544872	0.611187	0.388813
10006	-1.5	1.0	-2.000000	0.651773	0.348227	0.538457	0.461543
10007	-6.0	1.0	7.500000	0.716570	0.283430	0.737074	0.262926
10008	-4.0	0.0	-5.000000	0.338904	0.661096	0.390260	0.609740
10009	-22.0	1.0	14.000000	0.829257	0.170743	0.910799	0.089201
10010	-7.5	1.0	6.000000	0.707107	0.292893	0.743268	0.256732
10011	-5.5	1.0	16.000000	0.640792	0.359208	0.690695	0.309305
10012	-3.0	1.0	1.500000	0.685180	0.314820	0.659790	0.340210
10013	-9.0	1.0	5.500000	0.757657	0.242343	0.737240	0.262760
10014	-20.5	1.0	36.500000	0.907637	0.092363	0.912006	0.087994
10015	-5.5	1.0	0.500000	0.681378	0.318622	0.698684	0.301316
10016	-6.5	1.0	19.000000	0.528164	0.471836	0.786794	0.213206
10017	-4.5	1.0	1.500000	0.576070	0.423930	0.730921	0.269079
10018	-6.0	1.0	-4.000000	0.650004	0.349996	0.685660	0.314340
10019	-4.0	0.0	-7.500000	0.381629	0.618371	0.412243	0.587757

	schedule_date	schedule_season	schedule_week	team_home	team_away	team_favorite_id	spread_favorite	over_under_line	weather_temperature	weather_wind_mph	score_home	score_away	home_favorite	away_favorite	hm_avg_pts_diff	aw_avg_pts_diff	elo1	elo2	elo_prob1	elo_prob2	qbelo_prob1	qbelo_prob2	over	result
14	1979-09-06	1979	2	DEN	LAR	DEN	-2.0	31.5	67.0	8.0	9.0	13.0	1.0	0.0	10.0	-7.0	1592.275	1544.396	0.656962	0.343038	0.645891	0.354109	0.0	0
15	1979-09-09	1979	2	IND	TB	PICK	0.0	33.0	64.0	8.0	26.0	29.0	0.0	0.0	-14.0	15.0	1397.713	1416.381	0.566285	0.433715	0.533151	0.466849	1.0	0
16	1979-09-09	1979	2	BUF	CIN	CIN	-3.0	34.0	54.0	7.0	51.0	24.0	0.0	1.0	-2.0	-10.0	1414.802	1478.226	0.502268	0.497732	0.474937	0.525063	1.0	1
17	1979-09-09	1979	2	CHI	MIN	CHI	-6.0	31.5	60.0	11.0	26.0	7.0	1.0	0.0	3.0	6.0	1495.817	1510.094	0.572482	0.427518	0.567900	0.432100	1.0	1
18	1979-09-09	1979	2	DET	WAS	PICK	0.0	35.5	72.0	0.0	24.0	27.0	0.0	0.0	-15.0	-2.0	1455.863	1459.567	0.587308	0.412692	0.493688	0.506312	1.0	0

	schedule_season	schedule_week	bet_accuracy	elo_bet_accuracy	total_bets	elo_total_bets	total_games
160	2018	20	0.00	0.00	2	2	2
161	2018	21	NaN	NaN	0	0	1
162	2019	1	83.33	81.82	12	11	16
163	2019	2	83.33	66.67	6	6	16
164	2019	3	69.23	75.00	13	12	16