In [2]:
#Import Packages
import pandas as pd
import numpy as np
from scipy.stats import norm
from Schedule.Schedule import Schedule
from Schedule.Stats import Stats
from Regression.ELO import ELO
from Regression.Game_Scores_v2 import Game_Scores
%matplotlib inline
In [3]:
#Get schedule of games
sched_2014 = Schedule(b_dt = '10/1/2014')
sched_2015 = Schedule(b_dt = '10/1/2015')
In [4]:
#Add four factors
sched_2014.add_four_factors()
sched_2015.add_four_factors()
Out[4]:
In [5]:
# Create last n statistics
games = sched_2014.get_games().append(sched_2015.get_games()).reset_index(drop=True)
stats = Stats(games, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff', ['Game_ID'])
In [6]:
# Creates stats for last 5 games
stats_5 = stats.get_lastn_stats(5)
stats_5.to_csv('stats_5.csv', index=False)
stats_5 = pd.read_csv('stats_5.csv')
In [7]:
#filters out games with insufficient data
print len(stats_5)
stats_5 = stats_5[stats_5['H_5_games']==5]
print len(stats_5)
stats_5 = stats_5[stats_5['A_5_games']==5]
print len(stats_5)
In [8]:
# Creates elo scores
elo = ELO(games, 'GAME_DATE', 'Home Team', 'Away Team', 'H_PTS', 'A_PTS')
elo_data = elo.create_elo(1500, 20, 100, '538')
In [9]:
# Joins ELO data back to games
for index, game in games.iterrows():
game_dt = game['GAME_DATE']
poss_elo = elo_data[elo_data.index < game_dt]
h_team = game['Home Team']
if len(poss_elo) == 0:
h_elo_score = 1500
else:
h_elo_score = poss_elo.tail(1)[h_team][0]
games.set_value(index, 'H_ELO', h_elo_score)
a_team = game['Away Team']
if len(poss_elo) == 0:
a_elo_score = 1500
else:
a_elo_score = poss_elo.tail(1)[a_team][0]
games.set_value(index, 'A_ELO', a_elo_score)
In [10]:
#Filter out early games with little ELO info
print len(games)
games = games.sort_values(by='GAME_DATE').reset_index(drop=True).ix[100:, :]
print len(games)
In [14]:
#Merges Last 5 data and ELO data
data = pd.merge(games[['H_ELO', 'A_ELO']], stats_5, on='Game_ID')
In [16]:
games.columns
Out[16]:
In [ ]:
#Create ELO scores
games = games_2014.append(games_2015).reset_index(drop=True)
for index, game in games.iterrows():
h_team = game['Home Team']
game_dt = game['GAME_DATE']
poss_elo = elo_data[elo_data.index < game_dt]
if len(poss_elo) == 0:
h_elo_score = 1500
else:
h_elo_score = poss_elo.tail(1)[h_team][0]
games.set_value(index, 'H_ELO', h_elo_score)
a_team = game['Away Team']
if len(poss_elo) == 0:
a_elo_score = 1500
else:
a_elo_score = poss_elo.tail(1)[a_team][0]
games.set_value(index, 'A_ELO', a_elo_score)
In [ ]:
#Joins stats data with ELO data--can't do this. need a way to pass this info through stats_lastn
data_5 = pd.merge(stats_5, games[['']])