In [2]:
import pandas as pd
import numpy as np
import matplotlib
from scipy import stats, integrate
import matplotlib.pyplot as plt
%matplotlib inline
import cufflinks as cf
import plotly
plotly.offline.init_notebook_mode()
import plotly.offline as py
import plotly.graph_objs as go
from plotly.graph_objs import *
import seaborn as sns
sns.set(color_codes=True)
print(pd.__version__)
In [1]:
df = pd.read_csv('/Users/DanMoeller/git/ncaa-bball-attendance/data/big_east/big_east_2014_2016.csv',sep=",",header='infer')
In [37]:
df.head(5)
Out[37]:
In [38]:
df.tail(5)
Out[38]:
In [39]:
df.describe()
Out[39]:
In [40]:
df.dtypes
Out[40]:
In [41]:
df.columns
Out[41]:
In [42]:
attend = df.attendance.values
cap = df.capacity.values
In [43]:
# Every Big East games 2014-16 season
plt.plot(cap,attend,"bo")
plt.xlabel("Stadium Capacity")
plt.ylabel("Game Attendance")
plt.show()
# The vertical lines are different teams home games.
In [44]:
# only include Big East Conference games
big_east_teams = [305, 2086, 222, 46, 2507, 269, 2550, 2752, 2599, 156]
df_conf = df[df.home_id.isin(big_east_teams) & df.away_id.isin(big_east_teams)]
In [45]:
# Dataframe for each team
df_depaul = df_conf[df_conf.home_id == 305]
df_butler = df_conf[df_conf.home_id == 2086]
df_villanova = df_conf[df_conf.home_id == 222]
df_georgetown = df_conf[df_conf.home_id == 46]
df_providence = df_conf[df_conf.home_id == 2507]
df_marquette = df_conf[df_conf.home_id == 269]
df_seton_hall = df_conf[df_conf.home_id == 2550]
df_xavier = df_conf[df_conf.home_id == 2752]
df_st_johns = df_conf[df_conf.home_id == 2599]
df_creighton = df_conf[df_conf.home_id == 156]
In [46]:
attend = df_conf.attendance.values
cap = df_conf.capacity.values
game = df_conf.game_id.values
pct_full = attend / cap
line = df_conf.line.values
In [47]:
plt.plot(df_depaul.line.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.line.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.line.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.line.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.line.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.line.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.line.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.line.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.line.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.line.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Betting Line")
plt.ylabel("Percent Full")
plt.show()
In [48]:
plt.plot(df_depaul.away_win_pct.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.away_win_pct.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.away_win_pct.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.away_win_pct.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.away_win_pct.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.away_win_pct.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.away_win_pct.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.away_win_pct.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.away_win_pct.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.away_win_pct.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Away Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [49]:
plt.plot(df_depaul.home_win_pct.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.home_win_pct.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.home_win_pct.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.home_win_pct.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.home_win_pct.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.home_win_pct.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.home_win_pct.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.home_win_pct.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.home_win_pct.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.home_win_pct.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Home Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [50]:
plt.plot((df_depaul.home_win_pct.values + df_depaul.away_win_pct.values)/2, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot((df_butler.home_win_pct.values + df_butler.away_win_pct.values)/2, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot((df_villanova.home_win_pct.values + df_villanova.away_win_pct.values)/2, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot((df_georgetown.home_win_pct.values + df_georgetown.away_win_pct.values)/2, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot((df_providence.home_win_pct.values + df_providence.away_win_pct.values)/2, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot((df_marquette.home_win_pct.values + df_marquette.away_win_pct.values)/2, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot((df_seton_hall.home_win_pct.values + df_seton_hall.away_win_pct.values)/2, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot((df_xavier.home_win_pct.values + df_xavier.away_win_pct.values)/2, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot((df_st_johns.home_win_pct.values + df_st_johns.away_win_pct.values)/2, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot((df_creighton.home_win_pct.values + df_creighton.away_win_pct.values)/2, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Average Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [51]:
plt.plot(df_depaul.away_rank.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.away_rank.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.away_rank.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.away_rank.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.away_rank.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.away_rank.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.away_rank.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.away_rank.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.away_rank.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.away_rank.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Away Rank")
plt.ylabel("Percent Full")
plt.show()
In [52]:
plt.plot(df_depaul.home_rank.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.home_rank.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.home_rank.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.home_rank.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.home_rank.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.home_rank.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.home_rank.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.home_rank.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.home_rank.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.home_rank.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Home Rank")
plt.ylabel("Percent Full")
plt.show()
In [53]:
plt.plot(df_depaul.scoring_line.values, (df_depaul.attendance.values / df_depaul.capacity.values),"o",color='#00558c',label='DePaul')
plt.plot(df_butler.scoring_line.values, (df_butler.attendance.values / df_butler.capacity.values),"o",color='#13294b',label='Butler')
plt.plot(df_villanova.scoring_line.values, (df_villanova.attendance.values / df_villanova.capacity.values),"o",color='#001F5B',label='Villanova')
plt.plot(df_georgetown.scoring_line.values, (df_georgetown.attendance.values / df_georgetown.capacity.values),"o",color='#011e41',label='Georgetown')
plt.plot(df_providence.scoring_line.values, (df_providence.attendance.values / df_providence.capacity.values),"ko",label='Providence')
plt.plot(df_marquette.scoring_line.values, (df_marquette.attendance.values / df_marquette.capacity.values),"o",color='#FFCC00',label='Marquette')
plt.plot(df_seton_hall.scoring_line.values, (df_seton_hall.attendance.values / df_seton_hall.capacity.values),"o",color='#004488',label='Seton_Hall')
plt.plot(df_xavier.scoring_line.values, (df_xavier.attendance.values / df_xavier.capacity.values),"o",color='#002857',label='Xavier')
plt.plot(df_st_johns.scoring_line.values, (df_st_johns.attendance.values / df_st_johns.capacity.values),"o",color='#CF102D',label='St Johns')
plt.plot(df_creighton.scoring_line.values, (df_creighton.attendance.values / df_creighton.capacity.values),"o",color='#3c4982',label='Creighton')
plt.xlabel("Scoring Line")
plt.ylabel("Percent Full")
plt.show()
In [ ]: