In [1]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import sqlite3
import statsmodels.api as sm
%matplotlib inline
In [2]:
con = sqlite3.connect("nflPPdb.sqlite")
players = pd.read_sql_query('SELECT * FROM players', con)
combine = pd.read_sql_query('SELECT * FROM combine', con)
rr = pd.read_sql_query('SELECT * FROM rr', con)
passing = pd.read_sql_query('SELECT * FROM passing', con)
In [3]:
rr['total_yards'] = rr['receiving_yards'] + rr['rushing_yards']
ydsData = rr[rr['position'].isin(['QB' , 'RB' , 'qb' , 'rb'])]
ydsData = ydsData.set_index(ydsData['name'])
ydsData = ydsData.groupby(['name'])['total_yards', 'receiving_yards','rushing_yards'].sum()
ydsData = ydsData.dropna()
ydsData = ydsData.drop_duplicates()
In [4]:
combineData = combine[combine['position'].isin(['QB' , 'RB', 'qb' , 'rb'])]
combineData = combineData.set_index(combineData['name'])
combineData = combineData[['speedscore','BMI']]
combineData = combineData.drop_duplicates()
In [5]:
regdata = pd.concat([combineData[0:120],ydsData[0:120]],axis=1)
regdata = regdata.dropna()
regdata
Out[5]:
In [6]:
mod = sm.OLS(regdata['speedscore'],regdata['total_yards'])
In [7]:
res = mod.fit()
In [8]:
print res.summary()
In [9]:
sm.graphics.plot_partregress('speedscore', 'total_yards', [], data = regdata, obs_labels=False)
Out[9]:
In [ ]:
X = regdata[['speedscore','BMI']]
Y = regdata['total_yards']
X = sm.add_constant(X)
mod2 = sm.OLS(Y,X)
In [ ]:
res2 = mod2.fit()
In [ ]:
xx1, xx2 = np.meshgrid(np.linspace(regdata['speedscore'].min(), regdata['speedscore'].max(), 100),
np.linspace(regdata['BMI'].min(), regdata['BMI'].max(), 100))
In [ ]:
res2.params
In [ ]:
Z = res2.params[0] + res2.params[1] * xx1 + res2.params[2] * xx2
In [ ]:
fig = plt.figure(figsize=(12, 8))
ax = Axes3D(fig, azim=-115, elev=15)
surf = ax.plot_surface(xx1, xx2, Z, cmap=plt.cm.RdBu_r, alpha=0.6, linewidth=0)
resid = Y - res2.predict(X)
ax.scatter(X[resid >= 0]['speedscore'], X[resid >= 0]['BMI'], Y[resid >= 0], c='r',
marker = 'o')
ax.scatter(X[resid < 0]['speedscore'], X[resid < 0]['BMI'], Y[resid < 0], color='black', alpha=1.0)
ax.set_xlabel('speedscore')
ax.set_ylabel('BMI')
ax.set_zlabel('Totalyards')
In [ ]:
resid
In [ ]:
combine_sort = combine.groupby(['college'])['index'].count()
combine_sort = combine_sort.sort_values(ascending =False)
combine_sort[0:10].plot(kind = 'Bar')
In [ ]: