In [1]:
#Variable Analysis
In [1]:
#Import Packages
from Schedule.Schedule import Schedule
from Schedule.Stats import Stats
import pandas as pd
In [2]:
#Get schedule of games
sched_2014 = Schedule(b_dt = '10/1/2014')
sched_2015 = Schedule(b_dt = '10/1/2015')
In [4]:
#Add four factors
sched_2014.add_four_factors()
sched_2015.add_four_factors()
Out[4]:
In [5]:
# Doing dunk data much much later
#sched_2014.add_dunk_data(log = True)
In [6]:
sched_2014.get_games().columns
Out[6]:
In [37]:
games_2014 = sched_2014.get_games()
games_2014['Pts_diff'] = [x-y for x,y in zip(games_2014['PTS_home'], games_2014['PTS_away'])]
games_2014 = games_2014[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2014 = games_2014.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2014['H_WL'] = [1 if x=='W' else 0 for x in games_2014['H_WL']]
games_2014['A_WL'] = [1-x for x in games_2014['H_WL']]
stats_2014 = Stats(games_2014, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')
stats_5 = stats_2014.get_lastn_stats(5)
In [38]:
games_2015 = sched_2015.get_games()
games_2015['Pts_diff'] = [x-y for x,y in zip(games_2015['PTS_home'], games_2015['PTS_away'])]
games_2015 = games_2015[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2015 = games_2015.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2015['H_WL'] = [1 if x=='W' else 0 for x in games_2015['H_WL']]
games_2015['A_WL'] = [1-x for x in games_2015['H_WL']]
stats_2015 = Stats(games_2015, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')
stats_5 = stats_5.append(stats_2015.get_lastn_stats(5))
stats_5.to_csv('stats_5.csv', index = False)
In [3]:
import numpy as np
stats_5 = pd.read_csv('stats_5.csv')
games_2015 = sched_2015.get_games()
games_2015['Pts_diff'] = [x-y for x,y in zip(games_2015['PTS_home'], games_2015['PTS_away'])]
games_2015 = games_2015[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2015 = games_2015.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2015['H_WL'] = [1 if x=='W' else 0 for x in games_2015['H_WL']]
games_2015['A_WL'] = [1-x for x in games_2015['H_WL']]
stats_2015 = Stats(games_2015, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')
#print np.corrcoef(stats_5['Pts_diff'], stats_5['H_WL_5'])[0,1]
stats_2015.get_correl(stats_5)
Out[3]:
In [12]:
from Regression.Reg_Model import Reg_Model
model = Reg_Model()
x = stats_5.drop('Pts_diff', axis=1)
model.set_training(x, stats_5['Pts_diff'])
model.calc_model()
In [16]:
print model.model_type
print model.mse
In [5]:
from Regression.Game_Scores_v2 import Game_Scores
x = stats_5.drop('Pts_diff', axis=1)
gs = Game_Scores(stats_5, x, 'Pts_diff')
In [6]:
gs.create_rank_order_graph()
In [ ]:
In [41]:
import matplotlib.pyplot as plt
plt.scatter(stats_5['Pts_diff'],stats_5['H_PTS_5'])
plt.show()
In [42]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_O_PTS_5'])
plt.show()
In [43]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_WL_5'])
plt.show()
In [30]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_FF_EFG_5'])
plt.show()
In [23]:
stats_5.columns
Out[23]:
In [ ]:
#so i don't see any huge indicators from these stats for home scores. i wonder if i should be looking at h-a score