In [1]:
##
# Setup -- import the modules we want and set up inline plotting
#
from __future__ import print_function
import datetime
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# Bigger fonts and figures for the demo
matplotlib.rcParams.update({
'font.size': 14,
'figure.figsize':(10.0, 8.0),
'axes.formatter.useoffset':False })
# Better data frame display for the demo
pd.set_option('expand_frame_repr', True)
pd.set_option('max_rows', 18)
pd.set_option('max_colwidth', 14)
pd.set_option('precision',2)
It's the total 2014 stats for people who started in NFL games last year. Thanks to TeamRankings.com. We're missing the count of safeties but hopefully they're so infrequent they don't affect rankings.
In [2]:
##
# Player data and the season schedule (for byes)
import os
player_stats_file = os.path.join('data', 'nfl_player_stats_season2014.csv')
season2015_file = os.path.join('data', 'nfl_season2015.csv')
prior_seasons_file = os.path.join('data', 'nfl_season2008to2014.csv')
stats = pd.read_csv(player_stats_file)
season2015 = pd.read_csv(season2015_file)
prior_seasons = pd.read_csv(prior_seasons_file) # Don't delete this, we need it later
stats.columns
Out[2]:
Team
- the full team name, exactly the same as in the other datasetsplayer
- the player name, plus team totals (TOTAL
) and opponent totals (OPPONENT TOTAL
)pos
- abbreviation for positions, such as QB
=quarterback, K
=kickerstarts
- number of starts in the 2014 seasonfumblesLost
- fumbles lost to the opposing team in 2014fumblesRecoveredTD
- fumbles recovered for a touchdown in 2014twoPt
- two point conversionpassingATT
- attempted passes in 2014passingCOMP
- completed passes in 2014passingINT
- intercepted passes in 2014passingTD
- passing touchdowns in 2014passingYDS
- passing yards in 2014receivingREC
- receptions in 2014receivingTD
- touchdowns made off of a reception in 2014receivingYDS
- receiving yards in 2014rushingATT
- rushing attempts in 2014rushingTD
- rushing touchdowns in 2014rushingYDS
- rushing yards in 2014kicking_extraPt
- extra points made in 2014kicking_FGge50A
- field golds $\ge$ 50 yards attemptedkicking_FGge50M
- field golds $\ge$ 50 yards madekicking_FGlt50A
- field golds $\lt$ 50 yards attemptedkicking_FGlt50M
- field golds $\lt$ yards madedefenseF
- fumbles forced in 2014defenseSCK
- sacks in 2014defenseTOTAL
- tackles in 2014defenseFumblesRecovered
- fumbles recovered in 2014pointreturnsFC
- fair catches on point returns in 2014pointreturnsRETURNS
- returns made on point returns in 2014pointreturnsTD
- point returns for a touchdown in 2014interceptionsINT
- interceptions in 2014interceptionsTD
- interceptions for a touchdown in 2014interceptionsYDS
- yards gained on interceptions in 2014kickreturnsRETURNS
- kick returns in 2014kickreturnsTD
- kick returns returned for a touchdown in 2014kickreturnsYDS
- yards gained during kick returns in 2014In Fantasy Football, you choose a defensive team, not individual players.
The dataset we have has rows in the column player
as 'TOTAL' and 'OPPONENT TOTAL' for total defensive stats, except for safeties (which I couldn't get easily - but which also are so rare that they shoudn't affect rankings too badly) and fumbles recovered for a touchdown (fumblesRecoveredTD
) which were from a separate dataset and not added in.
We need to aggregate fumblesRecoveredTD
over the individual players to get a score for the defense.
In [3]:
byteam_fumbleRecoveries = stats.groupby('Team').fumblesRecoveredTD.agg({'totalFumblesRecoveredTD': 'sum'}).reset_index()
byteam_fumbleRecoveries.describe()
Out[3]:
In [4]:
stats[stats.Player == 'TOTAL'][[
'Player', 'Team',
'interceptionsINT', 'interceptionsTD',
'pointreturnsTD', 'kickreturnsTD', 'fumblesRecoveredTD',
'defenseSCK', 'defenseF']]
Out[4]:
byteam_fumbleRecoveries
stats back into the original stats framebyteam_fumbleRecoveries
data frame. We will merge on that name plus the team to select the correct row.byteam_fumbleRecoveries
back into the stats
data frame, matching on the same team name (and with player
as the TOTAL
).totalFumblesRecoveredTD
(which we made for the aggregation).
In [5]:
# 1. Add the 'player' column with value 'TOTAL'
byteam_fumbleRecoveries['Player'] = 'TOTAL'
# 2. Merge (left join) to add the 'fumblesRecoveredTD' values to the 'stats' data frame.
stats = stats.merge(byteam_fumbleRecoveries, on=['Team', 'Player'], how='left')
# 3. Coalesce the two columns together into the original column
# (when I am assigning to a subset of rows I have to use the '.ix' accessor.
# The other one -- just [] -- will return a copy of the column and so
# the assignment will take place on the copy, not the original.)
stats.ix[stats.fumblesRecoveredTD.isnull(), 'fumblesRecoveredTD'] = stats[
stats.fumblesRecoveredTD.isnull()].totalFumblesRecoveredTD
# 4. Delete the column totalFumblesRecoveredTD.
del stats['totalFumblesRecoveredTD']
# Show the result
stats[stats.Player=='TOTAL'][['Team'] + [c for c in stats.columns if c.endswith('TD')]]
Out[5]:
For standard fantasy, you get 9 players on your starting roster and 6 players on the bench. The defensive players are rolled into one (we just did that for the last relevant type of scoring play) and we don't care about the other positions.
The parenthesized abbreviations are the corresponding value in the pos
column in this dataset:
QB
):1RB
):2WR
):2TE
):1K
):1First, let's add a value 'DEFENSE'
as the position when the player is named 'TOTAL'
. Then we'll delete all of the players who aren't the kinds we can use.
In [6]:
# These are the positions
stats.pos.unique()
Out[6]:
In [7]:
# 1. Add a value 'DEFENSE' as the position when the player is named 'TOTAL'
stats.ix[stats.Player == 'TOTAL', 'pos'] = 'DEFENSE'
# 2. Delete players that aren't the types used in Fantasy
stats = stats[stats.pos.isin(('QB', 'RB', 'WR', 'TE', 'K', 'DEFENSE'))]
print(stats.shape)
stats.head(6)
Out[7]:
The scoring shown in the table below uses the NFL fantasy standard rules. The fixed width
denotes the corresponding column name in the dataset.
Offense | Kicking | Defense |
for Touchdown: +6 `fumblesRecoveredTD` `twoPt` `fumblesLost` |
Point Attempts Made
|
|
---|---|---|
We have the game-by-game data from the 2014 season in the dataset prior_seasons
. Since all we have is aggregates everywhere else, there's no shame in just aggregating here too.
prior_seasons
data frame its respective Fantasy points according to the chart for Points Allowed, and sum them. This will give us one row per team.stats
data frame the same way as we merged fumblesRecoveredTD
player
column with value TOTAL
In [8]:
# 0. Function that will be used in (1) to assign points to each game in a season
def fantasy_points(points_allowed):
"""Calculate the fantasy score for the season, given the points allowed.
Points: (0) (1-6) (7-13) (14-20) (21-27) (28-34) (35+)
+10 +7 +4 +1 +0 -1 -4
"""
points = 10 * (points_allowed == 0).sum()
points += 7 * points_allowed.between(1,6).sum()
points += 4 * points_allowed.between(7,13).sum()
points += 1 * points_allowed.between(14,20).sum()
points += 0 * points_allowed.between(21,27).sum()
points += -1 * points_allowed.between(28,34).sum()
points += -4 * (points_allowed >= 35).sum()
return points
# 1. Assign each game from 2014 in the `prior_seasons` data frame its respective Fantasy points
defense_fantasy_points = prior_seasons[(prior_seasons.Season == 2014) & (prior_seasons.Week <= 17)
].groupby('Team').PointsAllowed.agg({'fantasyPA' :fantasy_points})
# reset the index (the groupby() made the 'Team' column into the index...get it out)
defense_fantasy_points.reset_index(inplace=True)
# 2 Merge it into the main 'stats' data frame
# 2a -- Add the 'player' column with value 'TOTAL'
defense_fantasy_points['Player'] = 'TOTAL'
# 2b -- Merge the 'defense_fantasy_points' into the 'stats' data frame.
# This will add the column 'fantasyPA' to 'stats'
if 'fantasyPA' in stats.columns:
del stats['fantasyPA'] # in case people run this cell more than once
stats = stats.merge(defense_fantasy_points, on=['Team', 'Player'], how='left')
stats[stats.fantasyPA.notnull()][['Team', 'fantasyPA']].head()
Out[8]:
In [9]:
# 1. Fill all of the null entries with zero
stats = stats.fillna(0)
# 2. Add `FantasyPtsTotal` to the dataset, set to zero
stats['FantasyPtsTotal'] = 0
# 3. Go through each column separately and add the correct points from each one
### --------------------------------------------- Passing ----- ###
## passingYDS -- Yards: +1 / 25 yds
stats.ix[:, 'FantasyPtsTotal'] += 1 * stats.passingYDS % 25
## passingTD -- Touchdowns: +4
stats.ix[:, 'FantasyPtsTotal'] += 4 * stats.passingTD
## passingINT -- Interceptions: -2
stats.ix[:, 'FantasyPtsTotal'] -= 2 * stats.passingINT
### --------------------------------------------- Rushing ----- ###
## rushingYDS -- Yards: +1 / 10 yds
stats.ix[:, 'FantasyPtsTotal'] += 1 * stats.rushingYDS % 10
## rushingTD -- Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.rushingTD
### ------------------------------------------- Receiving ----- ###
## receivingYDS -- Yards: +1 / 10 yds
stats.ix[:, 'FantasyPtsTotal'] += 1 * stats.receivingYDS % 10
## receivingTD -- Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.receivingTD
### --------------------------------------------- General ----- ###
## fumblesRecoveredTD -- Fumbles recovered for Touchdown: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.fumblesRecoveredTD
## twoPt -- 2-Point Conversions: +2
stats.ix[:, 'FantasyPtsTotal'] += 2 * stats.twoPt
## fumblesLost -- Fumbles Lost: -2
stats.ix[:, 'FantasyPtsTotal'] -= 2 * stats.fumblesLost
### --------------------------------------------- Kicking ----- ###
## kicking_extraPt -- Point Attempts Made: +1
stats.ix[:, 'FantasyPtsTotal'] += 1 * stats.kicking_extraPt
## kicking_FGlt50M -- Field Goals made at 0-49 yds: +3
stats.ix[:, 'FantasyPtsTotal'] += 3 * stats.kicking_FGlt50M
## kicking_FGge50M -- Field Goals made at 50+ yds: +5
stats.ix[:, 'FantasyPtsTotal'] += 3 * stats.kicking_FGge50M
### --------------------------------------------- Defense ----- ###
## defenseSCK -- Sacks: +1
stats.ix[:, 'FantasyPtsTotal'] += 1 * stats.defenseSCK
## interceptionsINT -- Interceptions: +2
stats.ix[:, 'FantasyPtsTotal'] += 2 * stats.interceptionsINT
## defenseF -- Fumbles Recovered: +2
stats.ix[:, 'FantasyPtsTotal'] += 2 * stats.defenseF
## interceptionsTD -- Defensive Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.interceptionsTD
## fumblesRecoveredTD -- Defensive Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.fumblesRecoveredTD
## kickreturnsTD -- Defensive Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.kickreturnsTD
## pointreturnsTD -- Defensive Touchdowns: +6
stats.ix[:, 'FantasyPtsTotal'] += 6 * stats.pointreturnsTD
## ............................ Defense - Points Allowed ..... ###
stats.ix[:, 'FantasyPtsTotal'] += stats.fantasyPA
stats.head(3)
Out[9]:
In [10]:
## Take a look at what we made:
# the top 10 people in each position.
#
for position, points in stats.sort(columns='FantasyPtsTotal', ascending=False)[
['Player', 'pos', 'Team', 'FantasyPtsTotal']].groupby('pos'):
print(" ".join(('#', '-' * 50, position, '-' * 3)))
print("Points\tTeam\t\t\tPlayer")
for row in points.head(10).apply(
lambda x: '{}\t{:<20}\t{}'.format(int(x.FantasyPtsTotal), x.Team, x.Player),
axis=1):
print(row)
The stats are for the previous season. Use the roster data ('data/nfl_rosters2015.csv
') we have to map the player to the correct team, and add the bye week.
data/nfl_rosters2015.csv
' and merge it with stats
to correctly map players to their 2015 team.After we merge, we will have pairs of columns that need to be reconciled:
Team
' and 'Team2014
' -- the player 'Defense' doesn't actually exist so after the merge the 'Team
' will be null for 'Defense'...make sure the 'Team
' column is populated with the 2014 valuePos
' (for 2015) and 'pos
' (for 2014). And then add the Bye Week for each player
nfl_season2015.csv
' to add a column Bye
to indicate the player's bye weekdata/nfl_top_100s.csv
' and add a column indicating the player's rank in the 2015 list
(it is the NFL one the players vote in. ESPN also has one.)
In [11]:
# Rename the player 'TOTAL' to the player 'Defense' because that's what it is.
stats.ix[stats.Player == 'TOTAL', 'Player'] = 'Defense'
# Read in 'data/nfl_rosters2015.csv' and merge it with stats
stats.columns = ['Team2014' if c == 'Team' else c for c in stats.columns]
rosters2015 = pd.read_csv(os.path.join('data', 'nfl_rosters2015.csv'))
rosters2015.head()
full_stats = stats.merge(rosters2015, on='Player', how='outer')
print(full_stats.shape)
print('Total players with fantasy numbers:', full_stats.FantasyPtsTotal.count())
print('Total players on 2015 rosters with fantasy numbers:',
((full_stats.Team.notnull()) & (full_stats.FantasyPtsTotal.notnull())).sum())
# Reconcile 'Team' and 'Team2014' for the player 'Defense'
full_stats.ix[full_stats.Player == 'Defense', 'Team'
] = full_stats[full_stats.Player == 'Defense'].Team2014
# Reconcile 'Pos' (for 2015) and 'pos' (for 2014).
full_stats.ix[full_stats.Pos.isnull(), 'Pos'] = full_stats[full_stats.Pos.isnull()].pos
del full_stats['pos']
# Read in 'nfl_top_100s.csv' and add a column indicating the player's rank
# in the 2015 list that the NFL players vote on.
nfl100 = pd.read_csv(os.path.join('data', 'nfl_top_100s.csv'))
nfl100 = nfl100[nfl100.year == 2015]
nfl100.columns = nfl100.columns.str.capitalize()
nfl100.columns = ['Top100Rank' if c == 'Rank' else c for c in nfl100.columns]
full_stats = full_stats.merge(
nfl100[['Player', 'Team', 'Top100Rank']],
on=['Player', 'Team'],
how='outer')
# Peek at the data
full_stats[full_stats.FantasyPtsTotal.notnull()][
['Team', 'Pos', 'Player', 'FantasyPtsTotal', 'Top100Rank']
].sort('Top100Rank').head()
Out[11]:
In [12]:
##
# Show teams by bye week
season2015 = pd.read_csv("data/nfl_season2015.csv")
all_teams = set(season2015.homeTeam.unique())
byes = dict(ByeWeek=[], Team=[])
for wk, dataset in season2015.groupby('week'):
bye_teams = all_teams.difference(dataset.homeTeam).difference(dataset.awayTeam)
byes['ByeWeek'].extend([wk] * len(bye_teams))
byes['Team'].extend(bye_teams)
byes = pd.DataFrame(byes)
for wk, dat in byes.groupby('ByeWeek'):
print('------------------ week {} '.format(wk))
print("\n".join(dat.Team))
In [13]:
# Add the player's 2015 bye week
if 'ByeWeek' in full_stats: # This part is in case someone runs the cell twice
del full_stats['ByeWeek']
full_stats = full_stats.merge(byes, on='Team', how='left')
full_stats[full_stats.FantasyPtsTotal.notnull()][
['Team', 'Pos', 'Player', 'ByeWeek', 'FantasyPtsTotal', 'Top100Rank']
].sort('Top100Rank').head()
Out[13]:
The player's 2015 position is 'Pos
'. Delete players who can't be on the fantasy roster.
Team
of the defense rows to be the same as in 2014First, let's add a value 'DEFENSE' as the position when the player is named 'TOTAL'. Then we'll delete all of the players who aren't the kinds we can use.
In [14]:
# Show the unique positions
full_stats.Pos.unique()
Out[14]:
In [15]:
# Change anything that starts with an 'WR' to 'WR' and anything that starts with 'RB' to 'RB'
full_stats.ix[full_stats.Pos.notnull() & full_stats.Pos.str.startswith('WR'), 'Pos'] = 'WR'
full_stats.ix[full_stats.Pos.notnull() & full_stats.Pos.str.startswith('RB'), 'Pos'] = 'RB'
# And then keep only the players we will use
full_stats = full_stats[full_stats.Pos.isin(('QB', 'RB', 'WR', 'TE', 'K', 'DEFENSE'))]
# And drop people who aren't on a team in 2015
full_stats = full_stats[full_stats.Team.notnull()]
full_stats.Pos.value_counts()
Out[15]:
In [16]:
## Take a look at what we made:
# the top 10 people in each position.
#
for position, points in full_stats.sort(columns='FantasyPtsTotal', ascending=False)[
['Player', 'Pos', 'Team', 'ByeWeek', 'Top100Rank', 'FantasyPtsTotal']].groupby('Pos'):
print(" ".join(('#', '-' * 50, position, '-' * 3)))
print("Points\tTeam\t\t\tBye NFL100\tPlayer")
for row in points.head(10).apply(
lambda x: '{}\t{:<20}\t{}\t{}\t{}'.format(
int(x.FantasyPtsTotal),
x.Team,
int(x.ByeWeek),
'--' if np.isnan(x.Top100Rank) else int(x.Top100Rank),
x.Player),
axis=1):
print(row)
The obvious advice is don't pick a backup player that's got the same bye week as your primary one.
Next, picking order could be
In [17]:
# Add the difference in total points between each player and one below,
# and each player and 10 below (worst case draft pick)
full_stats = full_stats[full_stats.Team.notnull()]
full_stats = full_stats.sort(['Pos', 'FantasyPtsTotal']) # Don't forget to sort
full_stats['FantasyPtsDelta'] = full_stats.groupby('Pos').FantasyPtsTotal.diff(1)
full_stats['FantasyPtsDelta10'] = full_stats.groupby('Pos').FantasyPtsTotal.diff(10)
full_stats['FantasyPtsBelowBest'] = full_stats.groupby('Pos').FantasyPtsTotal.apply(
lambda x: max(x) - x)
full_stats = full_stats.sort(['Pos', 'FantasyPtsTotal'], ascending=['True', 'False'])
full_stats.to_csv(os.path.join('excel_files', 'fantasy_points.xls'), index=False)
The script below prints out the top 15 players in each slot.
In [18]:
for position, points in full_stats.sort(columns='FantasyPtsTotal', ascending=False)[
['Player', 'Pos', 'Team', 'ByeWeek',
'Top100Rank',
'FantasyPtsTotal', 'FantasyPtsDelta', 'FantasyPtsDelta10']].groupby('Pos'):
print(" ".join(('#', '-' * 70, position, '-' * 3)))
print("Points\t(d1)\t(d10)\tTeam\t\t\tBye NFL100\tPlayer")
nrows = min(15, points.shape[0])
for row in points.head(nrows).apply(
lambda x: '{}\t{}\t{}\t{:<20}\t{}\t{}\t{}'.format(
'--' if np.isnan(x.FantasyPtsTotal) else int(x.FantasyPtsTotal),
'--' if np.isnan(x.FantasyPtsDelta) else int(x.FantasyPtsDelta),
'--' if np.isnan(x.FantasyPtsDelta10) else int(x.FantasyPtsDelta10),
x.Team,
int(x.ByeWeek),
'--' if np.isnan(x.Top100Rank) else int(x.Top100Rank),
x.Player),
axis=1):
print(row)
In [19]:
fig, axs = plt.subplots(3, 2, sharex=True)
positions = ('DEFENSE', 'WR', 'QB', 'RB', 'K', 'TE')
for (ax, position) in zip(axs.flatten(), positions):
p = full_stats[full_stats.Pos == position]
p = p[p.FantasyPtsTotal.notnull()].sort('FantasyPtsTotal', ascending=False).head(15)
p = p.sort('FantasyPtsTotal')
pos = [0.5 + r for r in range(p.shape[0])]
ax.barh(pos, p.FantasyPtsTotal, align='center')
ax.set_yticks(pos)
if position == 'DEFENSE':
ax.set_yticklabels(list(p.Team), size='x-small')
else:
ax.set_yticklabels(list(p.Player), size='x-small')
ax.set_title(position)
for s in ('top', 'right', 'bottom', 'left'):
ax.spines[s].set_visible(False)
if ax.is_last_row():
ax.set_xlabel('Fantasy Points, 2014')
plt.show()
In [20]:
fig, axs = plt.subplots(3, 2, sharex=True)
for (ax, position) in zip(axs.flatten(), positions):
p = full_stats[full_stats.Pos == position]
p = p[p.FantasyPtsBelowBest.notnull()].sort('FantasyPtsTotal', ascending=False).head(20)
p = p.sort('FantasyPtsTotal')
pos = [0.5 + r for r in range(p.shape[0])]
ax.barh(pos, p.FantasyPtsBelowBest, align='center')
ax.set_yticks(pos)
if position == 'DEFENSE':
ax.set_yticklabels(list(p.Team), size='x-small')
else:
ax.set_yticklabels(list(p.Player), size='x-small')
ax.set_title(position)
for s in ('top', 'right', 'bottom', 'left'):
ax.spines[s].set_visible(False)
if ax.is_last_row():
ax.set_xlabel('Fantasy Points below Top Player, 2014')
plt.show()