In [9]:
import pandas as pd
import numpy as np
import matplotlib

from scipy import stats, integrate

import matplotlib.pyplot as plt
%matplotlib inline
import cufflinks as cf

import plotly
plotly.offline.init_notebook_mode()
import plotly.offline as py
import plotly.graph_objs as go
from plotly.graph_objs import *

import seaborn as sns
sns.set(color_codes=True)

print(pd.__version__)


0.19.2

In [24]:
games = pd.read_csv('/Users/DanMoeller/git/ncaa-bball-attendance/data/big_east/big_east_2014_2016.csv',sep=",",header='infer')
team_list = pd.read_csv('/Users/DanMoeller/git/ncaa-bball-attendance/data/team_list.csv',sep=",",header='infer')

In [29]:
games = pd.merge(games,team_list,left_on='home_id',right_on='id')
# del games['id']
# del games['url_name']
# games.rename(index=str, columns={"school": "home_school", "conf": "home_conf"})




games.head(5)


Out[29]:
away_wins attendance capacity home_id home_opener home_losses away_win_pct away_losses home_win_pct home_score ... id_x conf_x school_y url_name_y id_y conf_y school url_name id conf
0 0 16859 15500 156 0 0 0.000 1 1.000 96 ... 156 big_east creighton creighton-bluejays 156 big_east creighton creighton-bluejays 156 big_east
1 0 18078 15500 156 0 0 0.000 3 1.000 82 ... 156 big_east creighton creighton-bluejays 156 big_east creighton creighton-bluejays 156 big_east
2 6 17530 15500 156 0 2 0.750 2 0.750 82 ... 156 big_east creighton creighton-bluejays 156 big_east creighton creighton-bluejays 156 big_east
3 2 16303 15500 156 0 2 0.222 7 0.778 88 ... 156 big_east creighton creighton-bluejays 156 big_east creighton creighton-bluejays 156 big_east
4 8 17533 15500 156 0 2 0.727 3 0.800 68 ... 156 big_east creighton creighton-bluejays 156 big_east creighton creighton-bluejays 156 big_east

5 rows × 34 columns


In [25]:
games.head(5)


Out[25]:
away_wins attendance capacity home_id home_opener home_losses away_win_pct away_losses home_win_pct home_score ... game_id line day away_id home_rank home_wins scoring_line time away_score tv_coverage
0 0 16859 15500 156 0 0 0.000 1 1.000 96 ... 400499763 0.0 tuesday 140 0 1 NaN 1:00 70 fs1
1 2 4200 4200 2603 0 0 1.000 0 1.000 79 ... 400504465 3.0 sunday 156 0 2 NaN 0:00 83 NaN
2 0 18078 15500 156 0 0 0.000 3 1.000 82 ... 400499278 0.0 saturday 202 0 3 NaN 20:30 72 fsn
3 5 3481 4200 299 0 7 0.714 2 0.125 61 ... 400502942 13.0 wednesday 156 0 1 NaN 3:00 78 NaN
4 6 17530 15500 156 0 2 0.750 2 0.750 82 ... 400504490 -12.5 sunday 158 0 6 NaN 23:07 67 fs1

5 rows × 22 columns


In [12]:
games.describe()


Out[12]:
away_wins attendance capacity home_id home_opener home_losses away_win_pct away_losses home_win_pct home_score away_rank game_id line away_id home_rank home_wins scoring_line away_score
count 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 533.000000 5.330000e+02 533.000000 533.000000 533.000000 533.000000 157.000000 533.000000
mean 8.909944 9743.247655 14184.435272 1241.365854 0.043152 4.615385 0.601780 5.052533 0.679454 74.395872 4.497186 4.006500e+08 -6.312383 1308.727955 0.476548 9.294559 144.987261 67.060038
std 6.366276 4555.379015 5221.704607 1153.232948 0.203390 4.440533 0.261039 4.253377 0.237606 11.897390 7.295746 1.411536e+05 9.284986 1157.007260 2.600230 6.031053 6.858447 11.839471
min 0.000000 754.000000 2500.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 35.000000 0.000000 4.004969e+08 -35.000000 2.000000 0.000000 0.000000 129.000000 37.000000
25% 3.000000 6500.000000 9493.000000 222.000000 0.000000 1.000000 0.455000 2.000000 0.567000 67.000000 0.000000 4.005046e+08 -12.500000 222.000000 0.000000 5.000000 140.000000 58.000000
50% 8.000000 8600.000000 15500.000000 305.000000 0.000000 3.000000 0.643000 4.000000 0.688000 74.000000 0.000000 4.005884e+08 -5.500000 333.000000 0.000000 9.000000 145.000000 66.000000
75% 13.000000 12736.000000 18717.000000 2550.000000 0.000000 7.000000 0.778000 7.000000 0.857000 83.000000 7.000000 4.008278e+08 0.000000 2550.000000 0.000000 13.000000 150.000000 75.000000
max 27.000000 28135.000000 33000.000000 2752.000000 1.000000 22.000000 1.000000 22.000000 1.000000 114.000000 25.000000 4.008723e+08 20.000000 2916.000000 24.000000 29.000000 166.000000 99.000000

In [14]:
games.dtypes


Out[14]:
away_wins         int64
attendance        int64
capacity          int64
home_id           int64
home_opener       int64
home_losses       int64
away_win_pct    float64
away_losses       int64
home_win_pct    float64
home_score        int64
away_rank         int64
date             object
game_id           int64
line            float64
day              object
away_id           int64
home_rank         int64
home_wins         int64
scoring_line    float64
time             object
away_score        int64
tv_coverage      object
dtype: object

In [15]:
games.columns


Out[15]:
Index([u'away_wins', u'attendance', u'capacity', u'home_id', u'home_opener',
       u'home_losses', u'away_win_pct', u'away_losses', u'home_win_pct',
       u'home_score', u'away_rank', u'date', u'game_id', u'line', u'day',
       u'away_id', u'home_rank', u'home_wins', u'scoring_line', u'time',
       u'away_score', u'tv_coverage'],
      dtype='object')

In [23]:



Out[23]:
attendance   NaN
capacity     NaN
Name: attendance, dtype: float64

In [ ]: