In [1]:
import pandas as pd
import numpy as np
import matplotlib
from scipy import stats, integrate
import matplotlib.pyplot as plt
%matplotlib inline
import cufflinks as cf
import plotly
plotly.offline.init_notebook_mode()
import plotly.offline as py
import plotly.graph_objs as go
from plotly.graph_objs import *
import seaborn as sns
sns.set(color_codes=True)
print(pd.__version__)
In [6]:
df = pd.read_csv('/Users/DanMoeller/git/ncaa-bball-attendance/data/big_12/big_12_2014_2016.csv',sep=",",header='infer')
In [7]:
df.head(5)
Out[7]:
In [8]:
df.tail(5)
Out[8]:
In [9]:
df.describe()
Out[9]:
In [10]:
df.dtypes
Out[10]:
In [11]:
df.columns
Out[11]:
In [12]:
attend = df.attendance.values
cap = df.capacity.values
In [13]:
# Every Big East games 2014-16 season
plt.plot(cap,attend,"bo")
plt.xlabel("Stadium Capacity")
plt.ylabel("Game Attendance")
plt.show()
# The vertical lines are different teams home games.
In [15]:
# only include Big East Conference games
big_12_teams = [239, 66, 2305, 2306, 201, 197, 2628, 251, 2641, 277]
df_conf = df[df.home_id.isin(big_12_teams) & df.away_id.isin(big_12_teams)]
In [27]:
# Dataframe for each team
# df_baylor = df_conf[df_conf.home_id == 239]
# df_iowa_state = df_conf[df_conf.home_id == 66]
# df_kansas = df_conf[df_conf.home_id == 2305]
# df_kansas_state = df_conf[df_conf.home_id == 2306]
# df_oklahoma = df_conf[df_conf.home_id == 201]
# df_ok_state = df_conf[df_conf.home_id == 197]
# df_tcu = df_conf[df_conf.home_id == 2628]
# df_texas = df_conf[df_conf.home_id == 251]
# df_texas_tech = df_conf[df_conf.home_id == 2641]
# df_west_virginia = df_conf[df_conf.home_id == 277]
df_baylor = df[df.home_id == 239]
df_iowa_state = df[df.home_id == 66]
df_kansas = df[df.home_id == 2305]
df_kansas_state = df[df.home_id == 2306]
df_oklahoma = df[df.home_id == 201]
df_ok_state = df[df.home_id == 197]
df_tcu = df[df.home_id == 2628]
df_texas = df[df.home_id == 251]
df_texas_tech = df[df.home_id == 2641]
df_west_virginia = df[df.home_id == 277]
In [28]:
attend = df_conf.attendance.values
cap = df_conf.capacity.values
game = df_conf.game_id.values
pct_full = attend / cap
line = df_conf.line.values
In [29]:
plt.plot(df_baylor.line.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.line.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.line.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.line.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.line.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.line.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.line.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.line.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.line.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.line.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Betting Line")
plt.ylabel("Percent Full")
plt.show()
In [30]:
plt.plot(df_baylor.away_win_pct.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.away_win_pct.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.away_win_pct.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.away_win_pct.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.away_win_pct.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.away_win_pct.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.away_win_pct.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.away_win_pct.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.away_win_pct.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.away_win_pct.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Away Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [31]:
plt.plot(df_baylor.home_win_pct.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.home_win_pct.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.home_win_pct.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.home_win_pct.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.home_win_pct.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.home_win_pct.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.home_win_pct.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.home_win_pct.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.home_win_pct.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.home_win_pct.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Home Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [32]:
plt.plot((df_baylor.home_win_pct.values + df_baylor.away_win_pct.values)/2, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot((df_iowa_state.home_win_pct.values + df_iowa_state.away_win_pct.values)/2, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot((df_kansas.home_win_pct.values + df_kansas.away_win_pct.values)/2, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot((df_kansas_state.home_win_pct.values + df_kansas_state.away_win_pct.values)/2, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot((df_oklahoma.home_win_pct.values + df_oklahoma.away_win_pct.values)/2, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot((df_ok_state.home_win_pct.values + df_ok_state.away_win_pct.values)/2, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot((df_tcu.home_win_pct.values + df_tcu.away_win_pct.values)/2, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot((df_texas.home_win_pct.values + df_texas.away_win_pct.values)/2, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot((df_texas_tech.home_win_pct.values + df_texas_tech.away_win_pct.values)/2, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot((df_west_virginia.home_win_pct.values + df_west_virginia.away_win_pct.values)/2, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Average Win Percentage")
plt.ylabel("Percent Full")
plt.show()
In [33]:
plt.plot(df_baylor.away_rank.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.away_rank.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.away_rank.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.away_rank.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.away_rank.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.away_rank.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.away_rank.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.away_rank.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.away_rank.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.away_rank.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Away Rank")
plt.ylabel("Percent Full")
plt.show()
In [34]:
plt.plot(df_baylor.home_rank.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.home_rank.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.home_rank.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.home_rank.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.home_rank.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.home_rank.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.home_rank.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.home_rank.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.home_rank.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.home_rank.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Home Rank")
plt.ylabel("Percent Full")
plt.show()
In [35]:
plt.plot(df_baylor.scoring_line.values, (df_baylor.attendance.values / df_baylor.capacity.values),"o",color='#00558c',label='Baylor')
plt.plot(df_iowa_state.scoring_line.values, (df_iowa_state.attendance.values / df_iowa_state.capacity.values),"o",color='#13294b',label='Iowa State')
plt.plot(df_kansas.scoring_line.values, (df_kansas.attendance.values / df_kansas.capacity.values),"o",color='#001F5B',label='Kansas')
plt.plot(df_kansas_state.scoring_line.values, (df_kansas_state.attendance.values / df_kansas_state.capacity.values),"o",color='#011e41',label='Kansas St')
plt.plot(df_oklahoma.scoring_line.values, (df_oklahoma.attendance.values / df_oklahoma.capacity.values),"ko",label='Oklahoma')
plt.plot(df_ok_state.scoring_line.values, (df_ok_state.attendance.values / df_ok_state.capacity.values),"o",color='#FFCC00',label='Ok State')
plt.plot(df_tcu.scoring_line.values, (df_tcu.attendance.values / df_tcu.capacity.values),"o",color='#004488',label='TCU')
plt.plot(df_texas.scoring_line.values, (df_texas.attendance.values / df_texas.capacity.values),"o",color='#002857',label='Texas')
plt.plot(df_texas_tech.scoring_line.values, (df_texas_tech.attendance.values / df_texas_tech.capacity.values),"o",color='#CF102D',label='Texas Tech')
plt.plot(df_west_virginia.scoring_line.values, (df_west_virginia.attendance.values / df_west_virginia.capacity.values),"o",color='#3c4982',label='West Virginia')
plt.xlabel("Scoring Line")
plt.ylabel("Percent Full")
plt.show()
In [ ]:
In [ ]: