Stat Category | Point Value |
---|---|
Passing Yards | 1 point for every 25 yards |
Rushing Yards | 1 point for every 10 yards |
Rushing TDs | 6 points |
Receiving Yards | 1 point for every 10 yards |
Receiving TDs | 6 points |
Interceptions | -2 points |
In [1]:
%matplotlib inline
import pandas as pd
import matplotlib as mp
import numpy as np
import seaborn as sns
In [2]:
football_db = pd.read_csv('FootballDB2016YearlyStats.csv')
football_db.columns.values
Out[2]:
In [3]:
football_db['FantasyVariance'] = (football_db.FantasyPts / football_db.CalcFantasyPoints)
# Correct any divide by 0 errors
football_db['FantasyVariance'].replace([np.inf, -np.inf], 0)
print('Done')
In [4]:
football_db.to_csv('Checkdata.csv')
In [5]:
fantasy_mean = football_db.FantasyVariance.mean()
fantasy_median = football_db.FantasyVariance.median()
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
In [6]:
#
In [7]:
x = football_db['FantasyVariance']
sns.set_context('poster')
sns.set_style("ticks")
g=sns.distplot(x,
kde_kws={"color":"g","lw":4,"label":"KDE Estim","alpha":0.5},
hist_kws={"color":"r","alpha":0.3,"label":"Freq"})
# remove the top and right line in graph
sns.despine()
# Set the size of the graph from here
g.figure.set_size_inches(12,7)
# Set the Title of the graph from here
g.axes.set_title('Provided and Calculated Fantasy Point Variance Distribution', fontsize=34,color="b",alpha=0.3)
# Set the xlabel of the graph from here
g.set_xlabel("Variance",size = 67,color="g",alpha=0.5)
# Set the ylabel of the graph from here
g.set_ylabel("Density",size = 67,color="r",alpha=0.5)
# Set the ticklabel size and color of the graph from here
g.tick_params(labelsize=14,labelcolor="black")
In [8]:
qb = football_db.loc[football_db.Pos == 'QB']
x = qb['FantasyVariance']
fantasy_mean = x.mean()
fantasy_median = x.median()
fantasy_std = np.std(x)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(x)
In [28]:
rb = football_db.loc[football_db.Pos == 'RB']
print(type(rb))
x = rb['FantasyVariance']
fantasy_mean = x.mean()
fantasy_median = x.median()
fantasy_std = np.std(x)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(x)
In [10]:
wr = football_db.loc[football_db.Pos == 'WR']
x = wr['FantasyVariance']
fantasy_mean = x.mean()
fantasy_median = x.median()
fantasy_std = np.std(x)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(x,
kde_kws={"color":"g","lw":4,"label":"KDE Estim","alpha":0.5},
hist_kws={"color":"r","alpha":0.3,"label":"Freq"})
If 'Name' exists in Checkdata.csv, this step can be skipped. This will format the 'Player' data column scraped from the footballdb.com website, which has this format: "Aaron Rodgers, GB", and writes out a 'Name' data column that will match the game by game Name field and look like this: "rodgers, aaron"
In [11]:
name = []
for index, row in football_db.iterrows():
full_name = row.Player
split_name = full_name.split()
first_name = split_name[0]
last_name = split_name[1]
name.append(last_name.lower() + ' ' + first_name.lower())
football_db['Name'] = name
football_db.to_csv('Checkdata.csv')
print('Done!')
In [12]:
qb_games = pd.read_csv('qb_games.csv')
qb_games['FantasyPoints'] = (qb_games['Pass Yds']/25) + (6 * qb_games['Pass TD']) - (2 * qb_games['Pass Int']) + (qb_games['Rush Yds'] /10) + (6 * qb_games['Rush TD'])
qb_games.columns.values
Out[12]:
In [13]:
footballdb_qb = football_db.loc[football_db.Pos == 'QB']
footballdb_qb.head(10)
Out[13]:
In [14]:
qb_games2016 = qb_games.loc[qb_games.Year == 2016]
qb_games2016 = qb_games2016[['Name', 'FantasyPoints']]
football_db_qb = footballdb_qb[['Name', 'CalcFantasyPoints']]
qb_games2016_sum = qb_games2016.groupby(['Name'], as_index=False).sum()
In [15]:
football_db_qb.head(10)
Out[15]:
In [16]:
qb_merged = qb_games2016_sum.merge(football_db_qb, on='Name')
print(len(qb_games2016_sum))
print(len(qb_merged))
In [17]:
qb_var = qb_merged.FantasyPoints / qb_merged.CalcFantasyPoints
fantasy_mean = qb_var.mean()
fantasy_median = qb_var.median()
fantasy_std = np.std(qb_var)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(qb_var,
kde_kws={"color":"g","lw":4,"label":"KDE Estim","alpha":0.5},
hist_kws={"color":"r","alpha":0.3,"label":"Freq"})
In [18]:
rb_games = pd.read_csv('rb_games.csv')
rb_games['FantasyPoints'] = ((rb_games['Rush Yds'] + rb_games['Rec Yds']) / 10) + ((rb_games['Rush TD'] + rb_games['Rec TD']) *6)
rb_games2016 = rb_games.loc[rb_games.Year == 2016]
footballdb_rb = football_db.loc[football_db.Pos == 'RB']
print(len(footballdb_rb))
rb_games2016 = rb_games2016[['Name', 'FantasyPoints']]
football_db_rb = footballdb_rb[['Name', 'CalcFantasyPoints']]
rb_games2016_sum = rb_games2016.groupby(['Name'], as_index=False).sum()
rb_games2016_sum.head(32)
Out[18]:
In [19]:
rb_merged = rb_games2016_sum.merge(football_db_rb, on='Name')
print(len(rb_games2016_sum))
print(len(rb_merged))
In [20]:
rb_var = rb_merged.FantasyPoints / rb_merged.CalcFantasyPoints
fantasy_mean = rb_var.mean()
fantasy_median = rb_var.median()
fantasy_std = np.std(rb_var)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(rb_var,
kde_kws={"color":"g","lw":4,"label":"KDE Estim","alpha":0.5},
hist_kws={"color":"r","alpha":0.3,"label":"Freq"})
In [21]:
wr_games = pd.read_csv('wr_games.csv')
wr_games['FantasyPoints'] = ((wr_games['Rush Yds'] + wr_games['Rec Yds']) / 10) + ((wr_games['Rush TD'] + wr_games['Rec TD']) *6)
wr_games2016 = wr_games.loc[wr_games.Year == 2016]
footballdb_wr = football_db.loc[football_db.Pos == 'WR']
print(len(footballdb_wr))
wr_games2016 = wr_games2016[['Name', 'FantasyPoints']]
footballdb_wr = footballdb_wr[['Name', 'CalcFantasyPoints']]
wr_games2016_sum = wr_games2016.groupby(['Name'], as_index=False).sum()
print(len(wr_games2016_sum))
wr_games2016_sum.head(32)
Out[21]:
In [22]:
wr_merged = wr_games2016_sum.merge(footballdb_wr, on='Name')
print(len(wr_games2016_sum))
print(len(wr_merged))
In [23]:
wr_var = wr_merged.FantasyPoints / wr_merged.CalcFantasyPoints
fantasy_mean = wr_var.mean()
fantasy_median = wr_var.median()
fantasy_std = np.std(wr_var)
print('The mean for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_mean))
print('The median for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_median))
print('The standard deviation for the variance between provided score and the calculated score is: {0:.4f}.'.format(fantasy_std))
g=sns.distplot(wr_var,
kde_kws={"color":"g","lw":4,"label":"KDE Estim","alpha":0.5},
hist_kws={"color":"r","alpha":0.3,"label":"Freq"})
In [ ]:
In [235]:
qb_by_year = qb_games.groupby(['Name', 'Year', 'Career Year'], as_index=False)[['FantasyPoints']].sum()
qb_by_year['FantasyMean'] = qb_by_year.FantasyPoints / 16
qb2013 = qb_by_year.loc[qb_by_year.Year == 2013].sort_values(['FantasyPoints'], ascending=False)
qb2013 = qb2013.head(20)
qb2013_fp_mean = qb2013.FantasyPoints.mean()
qb2013_fm_mean = qb2013.FantasyMean.mean()
print('The 2013 fantasy score mean is: %d' %(qb2013_fp_mean))
print('The 2013 fantasy mean is: %d' %(qb2013_fm_mean))
In [236]:
qb2014 = qb_by_year.loc[qb_by_year.Year == 2014].sort_values(['FantasyPoints'], ascending=False)
qb2014 = qb2014.head(20)
qb2014_fp_mean = qb2014.FantasyPoints.mean()
qb2014_fm_mean = qb2014.FantasyMean.mean()
print('The 2014 fantasy score mean is: %d' %(qb2014_fp_mean))
print('The 2014 fantasy mean is: %d' %(qb2014_fm_mean))
In [229]:
qb2015 = qb_by_year.loc[qb_by_year.Year == 2015].sort_values(['FantasyPoints'], ascending=False)
qb2015_fp_mean = qb2015.FantasyPoints.mean()
qb2015_fm_mean = qb2015.FantasyMean.mean()
print('The 2015 fantasy score mean is: %d' %(qb2015_fp_mean))
print('The 2015 fantasy mean is: %d' %(qb2015_fm_mean))
qb2015.head(20)
Out[229]:
In [234]:
qb2016 = qb_by_year.loc[qb_by_year.Year == 2016].sort_values(['FantasyPoints'], ascending=False)
qb2016_fp_mean = qb2016.FantasyPoints.mean()
qb2016_fm_mean = qb2016.FantasyMean.mean()
print('The 2016 fantasy score mean is: %d' %(qb2016_fp_mean))
print('The 2016 fantasy mean is: %d' %(qb2016_fm_mean))
qb2016.head(20)
Out[234]:
In [177]:
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='Pass Att', x='Career Year', data=qb_by_year)
Out[177]:
In [ ]:
In [176]:
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='Pass Rate', x='Career Year', data=qb_by_year)
Out[176]:
In [172]:
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='FantasyPoints', x='Career Year', data=qb_by_year)
Out[172]:
In [178]:
qb_min_passes_by_year = qb_by_year.loc[qb_by_year['Pass Att'] > 20]
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='FantasyPoints', x='Career Year', data=qb_min_passes_by_year)
Out[178]:
In [179]:
qb_min_rate_by_year = qb_by_year.loc[qb_by_year['Pass Rate'] > 55]
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='FantasyPoints', x='Career Year', data=qb_min_passes_by_year)
Out[179]:
In [ ]:
qb_min_comb_by_year = qb_by_year.loc[(qb_by_year['Pass Rate'] > 55) & (qb_by_year['Pass Att'] > 20) & (qb_by_year['FantasyPoints'] > 20)]
In [183]:
qb_min_comb_by_year = qb_by_year.loc[(qb_by_year['Pass Rate'] > 55) & (qb_by_year['Pass Att'] > 20) & (qb_by_year['FantasyPoints'] > 20)]
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='FantasyPoints', x='Career Year', data=qb_min_comb_by_year)
Out[183]:
In [164]:
qb_Fantasy = qb_games.sort_values(['FantasyPoints'], ascending=True)
qb_lowest = qb_Fantasy[['Name', 'Year', 'Career Year', 'Pass Att', 'Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', 'FantasyPoints']].head(20)
In [170]:
qb_lowest = qb_Fantasy.head(20)[['Name', 'Career Year']]
# Merge on Name with the qb_games dataframe
qb_low_career = qb_games.merge(qb_lowest, on='Name')
print('The number of quarterbacks who generated the 20 worst scores are: %d'%(len(qb_low_career.Name.unique())))
qb_low_career.Name.unique()
Out[170]:
In [77]:
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot(y='FantasyPoints', x='Career Year_x', data=qb_low_career)
Out[77]:
In [160]:
qb_career = qb_low_career.groupby(['Name', 'Career Year_x'], as_index=False).mean()
print(len(qb_career.Name.unique()))
sns.lmplot(y='FantasyPoints', x='Career Year_x', data=qb_career)
Out[160]:
In [163]:
qb_Fantasy = qb_games.sort_values(['FantasyPoints'], ascending=False)
qb_top_games = qb_Fantasy[['Name', 'Year', 'Career Year', 'Pass Att', 'Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', 'FantasyPoints']].head(20)
In [43]:
qb_pass_att = qb_games.loc[(qb_games['Pass Att'] > 20) & (qb_games['Pass Rate'] > 90) & (qb_games['FantasyPoints'] > 20)]
print(type(qb_pass_att))
sns.lmplot(x='Career Year', y='FantasyPoints', data=qb_pass_att)
Out[43]:
In [ ]:
sns.lmplot(x='Rush Att', y='FantasyPoints', data=rb_games)
In [ ]:
sns.lmplot(x='Career Year', y='FantasyPoints', data=wr_games)