notebook.community

Edit and run



In [1]:

    
import pandas as pd
from pylab import *
%matplotlib inline
import matplotlib.colors as mplc

from scrapenhl2.scrape import team_info, teams, players, autoupdate
from scrapenhl2.manipulate import manipulate as manip, add_onice_players as onice
# autoupdate.autoupdate()  # uncomment this to update data

The purpose of this script is to generate shot counts for skaters after faceoffs.

For example, CA after 5 and 10 seconds for Nicklas Backstrom after defensive-zone faceoff wins.



In [2]:

    
team = team_info.team_as_id('WSH')
season = 2017
pbp = teams.get_team_pbp(season, team)
toi = teams.get_team_toi(season, team)

This is how we'll approach the problem:

Filter TOI to 5v5 and go wide to long on players
Isolate faceoffs from the PBP
- Get faceoff zones
- Filter for 5v5
- Get faceoff zones
Join faceoffs to TOI and code how many seconds after a draw that sec is
- Join faceoffs, have an indicator of last faceoff
- Fill forward on last faceoff time
- Calculate difference between time and last faceoff time; filter for time <= 15
Isolate shots from the PBP
- Join to TOI
- Filter for 5v5
- Join to first dataframe on game, player, and time
- Group by player, time since draw, draw zone and type, and which team took the shot, and sum
Repeat for each team (not done here)

5v5 TOI



In [3]:

    
# Filter to 5v5
toi = manip.filter_for_five_on_five(toi) \
    [['Game', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5', 'Time']] \
    .drop_duplicates()  # sometimes these appear
toi = toi.melt(id_vars=['Time', 'Game'], value_name='PlayerID') \
    .drop('variable', axis=1)
toi.head()

Link times to time since last draw and faceoff zones



In [4]:

    
# Get faceoffs
draws = manip.filter_for_event_types(pbp, 'Faceoff')

# Select only needed columns
draws = draws[['Game', 'Team', 'Period', 'MinSec', 'X', 'Y']]

# Convert period and mm:ss to time elapsed in game
draws = onice.add_times_to_file(draws, periodcol='Period', timecol='MinSec', time_format='elapsed')
draws = draws.drop({'Period', 'MinSec'}, axis=1).assign(Season=season)

# Get zones
directions = manip.get_directions_for_xy_for_season(season, team)
draws = manip.infer_zones_for_faceoffs(draws, directions, 'X', 'Y', '_Secs', focus_team='WSH')
draws = draws.drop({'X', 'Y'}, axis=1)

# Simplify zone notation to just N, O, D
draws.loc[:, 'Zone'] = draws.EventLoc.str.slice(0, 1)
draws = draws.drop('EventLoc', axis=1)

# Combine with team column to change N, D, O, to NW, NL, DW, DL, OW, OL
draws.loc[:, 'WL'] = draws.Team.apply(lambda x: 'W' if x == team else 'L')
draws.loc[:, 'ZS'] = draws.Zone + draws.WL
draws = draws.drop({'WL', 'Zone', 'Team'}, axis=1)

draws.head()



In [5]:

    
# Join to TOI
draws_joined = draws.rename(columns={'_Secs': 'Time'}) \
    .merge(toi, how='right', on=['Game', 'Time'])
    
# Add last faceoff indicator and fill forward
draws_joined = draws_joined.sort_values(['Game', 'PlayerID', 'Time'])
draws_joined.loc[pd.notnull(draws_joined.ZS), 'LastDraw'] = draws_joined.Time
draws_joined.loc[:, 'LastDraw'] = draws_joined[['Game', 'PlayerID', 'LastDraw']] \
    .groupby(['Game', 'PlayerID']).ffill()
draws_joined.loc[:, 'ZS'] = draws_joined[['Game', 'PlayerID', 'ZS']] \
    .groupby(['Game', 'PlayerID']).ffill()

draws_joined.loc[:, 'TimeSinceLastDraw'] = draws_joined.Time - draws_joined.LastDraw

draws_joined = draws_joined[pd.notnull(draws_joined.TimeSinceLastDraw)] \
    .query("TimeSinceLastDraw > 0")

draws_joined.head()









    Out[5]:







  
    
      
      Game
      Time
      Season
      ZS
      PlayerID
      LastDraw
      TimeSinceLastDraw
    
  
  
    
      9410
      20008
      369
      NaN
      DW
      8468498.0
      368.0
      1.0
    
    
      9415
      20008
      370
      NaN
      DW
      8468498.0
      368.0
      2.0
    
    
      9420
      20008
      371
      NaN
      DW
      8468498.0
      368.0
      3.0
    
    
      9425
      20008
      372
      NaN
      DW
      8468498.0
      368.0
      4.0
    
    
      9430
      20008
      373
      NaN
      DW
      8468498.0
      368.0
      5.0

Shot counts



In [6]:

    
# Get shot attempts
cfca = manip.filter_for_corsi(pbp)

# Select only needed columns
cfca = cfca[['Game', 'Team', 'Period', 'MinSec']]

# Convert period and mm:ss to time elapsed in game
cfca = onice.add_times_to_file(cfca, periodcol='Period', timecol='MinSec', time_format='elapsed')
cfca = cfca.drop({'Period', 'MinSec'}, axis=1).rename(columns={'_Secs': 'Time'})

# Add on-ice players
cfca = cfca.merge(toi, how='left', on=['Game', 'Time'])

# Change Team to CF or CA
cfca.loc[:, 'Team'] = cfca.Team.apply(lambda x: 'CF' if x == team else 'CA')

cfca.head()



In [7]:

    
# Join to faceoffs df
joined = draws_joined.merge(cfca, how='left', on=['Game', 'Time', 'PlayerID'])

# Get counts of time after each draw
time_counts = joined[['ZS', 'PlayerID', 'TimeSinceLastDraw']] \
    .assign(TOI=1) \
    .groupby(['ZS', 'PlayerID', 'TimeSinceLastDraw'], as_index=False) \
    .count()

# Get counts of shots
shot_counts = joined[['ZS', 'PlayerID', 'TimeSinceLastDraw', 'Team']] \
    .dropna() \
    .assign(Count=1) \
    .groupby(['ZS', 'PlayerID', 'TimeSinceLastDraw', 'Team'], as_index=False) \
    .count() \
    .pivot_table(index=['ZS', 'PlayerID', 'TimeSinceLastDraw'], columns='Team', values='Count') \
    .reset_index()
shot_counts.loc[:, 'CA'] = shot_counts.CA.fillna(0)
shot_counts.loc[:, 'CF'] = shot_counts.CF.fillna(0)

alljoined = time_counts \
    .merge(shot_counts, how='left', on=['ZS', 'PlayerID', 'TimeSinceLastDraw']) \
    .fillna(0)

alljoined.head()









    Out[7]:







  
    
      
      ZS
      PlayerID
      TimeSinceLastDraw
      TOI
      CA
      CF
    
  
  
    
      0
      DL
      8468498.0
      1.0
      87
      0.0
      0.0
    
    
      1
      DL
      8468498.0
      2.0
      87
      1.0
      0.0
    
    
      2
      DL
      8468498.0
      3.0
      85
      3.0
      0.0
    
    
      3
      DL
      8468498.0
      4.0
      83
      1.0
      0.0
    
    
      4
      DL
      8468498.0
      5.0
      84
      7.0
      0.0

Finally, we can replace Player IDs with names, export, and graph.



In [8]:

    
alljoined.loc[:, 'Player'] = players.playerlst_as_str(alljoined.PlayerID)
alljoined = alljoined.drop('PlayerID', axis=1)
alljoined.loc[:, 'CF60'] = alljoined.CF * 3600 / alljoined.TOI
alljoined.loc[:, 'CA60'] = alljoined.CA * 3600 / alljoined.TOI
alljoined.to_csv('time_since_last_draw_data.csv', index=False)
alljoined.head()









    Out[8]:







  
    
      
      ZS
      TimeSinceLastDraw
      TOI
      CA
      CF
      Player
      CF60
      CA60
    
  
  
    
      0
      DL
      1.0
      87
      0.0
      0.0
      Brooks Orpik
      0.0
      0.000000
    
    
      1
      DL
      2.0
      87
      1.0
      0.0
      Brooks Orpik
      0.0
      41.379310
    
    
      2
      DL
      3.0
      85
      3.0
      0.0
      Brooks Orpik
      0.0
      127.058824
    
    
      3
      DL
      4.0
      83
      1.0
      0.0
      Brooks Orpik
      0.0
      43.373494
    
    
      4
      DL
      5.0
      84
      7.0
      0.0
      Brooks Orpik
      0.0
      300.000000



In [9]:

    
comp_players = ('Nicklas Backstrom', 'Evgeny Kuznetsov', 'Lars Eller', 'Jay Beagle')

def plot_cumulative_shot_lines(df, zone, metric, ax, *comp_players):
    for p in comp_players:
        df1 = df.query('ZS == "{0:s}" & Player == "{1:s}"'.format(zone, p)).sort_values('TimeSinceLastDraw')
        df1.loc[:, 'TOI'] = df1.TOI.cumsum()
        df1.loc[:, metric] = df1[metric].cumsum()
        df1.loc[:, '{0:s}60'.format(metric)] = df1[metric] * 3600 / df1.TOI
        ax.plot(df1.TimeSinceLastDraw, df1['{0:s}60'.format(metric)], label=p)
    ax.set_title('Cumulative {0:s}60 after {1:s}'.format(metric, zone))

def plot_comparison(df, metric, *comp_players):
    fig, axes = subplots(2, 3, sharex=True, sharey=True, figsize=[12, 8])
    axes = axes.flatten()
    df2 = df.query('TimeSinceLastDraw <= 15')
    for i, zone in enumerate(['OW', 'OL', 'NW', 'NL', 'DW', 'DL']):
        if metric == 'N':
            plot_cumulative_ns(df2, zone, axes[i], *comp_players)
        else:
            plot_cumulative_shot_lines(df2, zone, metric, axes[i], *comp_players)
        legend(loc=1)
        
plot_comparison(alljoined, 'CF', *comp_players)









    



/Users/muneebalam/anaconda/lib/python3.6/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "



In [10]:

    
plot_comparison(alljoined, 'CA', *comp_players)









    



/Users/muneebalam/anaconda/lib/python3.6/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "



In [11]:

    
def plot_cumulative_ns(df, zone, ax, *comp_players):
    for p in comp_players:
        df1 = df.query('ZS == "{0:s}" & Player == "{1:s}"'.format(zone, p)).sort_values('TimeSinceLastDraw')
        ax.plot(df1.TimeSinceLastDraw, df1.TOI, label=p)
    ax.set_title('Post-faceoff time in {0:s}'.format(zone))
    
plot_comparison(alljoined, 'N', *comp_players)









    



/Users/muneebalam/anaconda/lib/python3.6/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "

It also might be interesting to take a look at what fraction of a player's 5v5 TOI is accounted for by the time after faceoffs.



In [12]:

    
# Find shift starts
shifts = toi.sort_values(['Game', 'PlayerID', 'Time'])

# If shift yields diff time in same game and player, then it's a shift start
shifts.loc[:, 'PrevT'] = shifts.Time.shift(1)
shifts.loc[:, 'PrevP'] = shifts.PlayerID.shift(1)
shifts.loc[:, 'PrevG'] = shifts.Game.shift(1)
shifts.loc[(shifts.PlayerID == shifts.PrevP) & (shifts.Game == shifts.PrevG) & (shifts.Time != shifts.PrevT + 1), 
           'ShiftIndex'] = 1
shifts.loc[:, 'ShiftIndex'] = shifts.ShiftIndex.fillna(0)
shifts.loc[:, 'ShiftIndex'] = shifts.ShiftIndex.cumsum()  

shifts = shifts.drop({'PrevT', 'PrevP', 'PrevG'}, axis=1)

shifts.head()



In [13]:

    
# Calculate amount of time at each point in shift
starttimes = shifts[['Time', 'ShiftIndex', 'PlayerID']] \
    .groupby(['ShiftIndex', 'PlayerID'], as_index=False).min() \
    .rename(columns={'Time': 'StartTime'})

shifts2 = shifts.merge(starttimes, how='left', on=['ShiftIndex', 'PlayerID'])
shifts2.loc[:, 'TimeSinceLastDraw'] = shifts2.Time - shifts2.StartTime + 1
counts = shifts2[['TimeSinceLastDraw', 'PlayerID']] \
    .assign(TOI=1) \
    .groupby(['TimeSinceLastDraw', 'PlayerID'], as_index=False) \
    .count() \
    .assign(ZS='Other')
    
counts.loc[:, 'Player'] = players.playerlst_as_str(counts.PlayerID)
counts = counts.drop('PlayerID', axis=1)
counts.head()









    Out[13]:







  
    
      
      TimeSinceLastDraw
      TOI
      ZS
      Player
    
  
  
    
      0
      1
      725
      Other
      Brooks Orpik
    
    
      1
      1
      589
      Other
      Alex Ovechkin
    
    
      2
      1
      501
      Other
      T.J. Oshie
    
    
      3
      1
      452
      Other
      Matt Niskanen
    
    
      4
      1
      323
      Other
      Taylor Chorney



In [14]:

    
# Add to original
alljoined2 = pd.concat([alljoined[['Player', 'ZS', 'TimeSinceLastDraw', 'TOI']], counts])
alljoined2 = alljoined2.sort_values(['Player', 'ZS', 'TimeSinceLastDraw'])

# Convert to percentages
totals = alljoined2.drop('ZS', axis=1) \
    .groupby(['Player', 'TimeSinceLastDraw'], as_index=False) \
    .sum() \
    .rename(columns={'TOI': 'TotalTOI'})
alljoined3 = alljoined2.merge(totals, how='left', on=['Player', 'TimeSinceLastDraw'])
alljoined3.loc[:, 'TOI'] = alljoined3.TOI / alljoined3.TotalTOI
alljoined3.head()









    Out[14]:







  
    
      
      Player
      TOI
      TimeSinceLastDraw
      ZS
      TotalTOI
    
  
  
    
      0
      Aaron Ness
      0.051948
      1.0
      DL
      231
    
    
      1
      Aaron Ness
      0.052174
      2.0
      DL
      230
    
    
      2
      Aaron Ness
      0.052174
      3.0
      DL
      230
    
    
      3
      Aaron Ness
      0.052632
      4.0
      DL
      228
    
    
      4
      Aaron Ness
      0.052863
      5.0
      DL
      227



In [15]:

    
def plot_stacked_area(df, p, ax, limit=20):
    zones = ['OW', 'OL', 'NW', 'NL', 'DW', 'DL'][::-1]  # for DZ at bottom
    
    # Set colors
    color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
    colors = [mplc.to_rgba(color_cycle[0], alpha=0.5), color_cycle[0], 
             mplc.to_rgba(color_cycle[1], alpha=0.5), color_cycle[1], 
             mplc.to_rgba(color_cycle[2], alpha=0.5), color_cycle[2]]
    
    
    struct = pd.DataFrame({'TimeSinceLastDraw': range(1, limit+1, 1)}).assign(Player=p)
    struct = struct.merge(df[['Player', 'TimeSinceLastDraw', 'ZS', 'TOI']], 
                          how='left', on=['Player', 'TimeSinceLastDraw'])
    struct = struct.pivot_table(index=['Player', 'TimeSinceLastDraw'], columns='ZS', values='TOI').reset_index()
    struct = struct.fillna(method='ffill')
    ax.stackplot(struct.TimeSinceLastDraw, [struct[zone] for zone in zones], labels=zones, colors=colors)
    ax.set_title('TOI into shift by 5v5 shift\nstart for {0:s}'.format(p))
    ax.set_xlabel('Seconds into shift')
    legend(loc=2)

def plot_tois(df, *comp_players):
    fig, axes = subplots(1, 4, sharex=True, sharey=True, figsize=[12, 4])
    axes = axes.flatten()
    for i, p in enumerate(comp_players):
        plot_stacked_area(df, p, axes[i])
    legend(loc=2, bbox_to_anchor=(1, 1))
    axes[0].set_ylabel('% of 5v5 shifts')

plot_tois(alljoined3, *comp_players)









    



/Users/muneebalam/anaconda/lib/python3.6/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "

	Time	Game	PlayerID
0	0	20008	8475200.0
1	1	20008	8475200.0
2	2	20008	8475200.0
3	3	20008	8475200.0
4	4	20008	8475200.0

	Game	_Secs	Season	ZS
0	20008	1	2017	NL
1	20008	330	2017	OL
2	20008	368	2017	DW
3	20008	386	2017	NW
4	20008	441	2017	DL

	Game	Time	Season	ZS	PlayerID	LastDraw	TimeSinceLastDraw
9410	20008	369	NaN	DW	8468498.0	368.0	1.0
9415	20008	370	NaN	DW	8468498.0	368.0	2.0
9420	20008	371	NaN	DW	8468498.0	368.0	3.0
9425	20008	372	NaN	DW	8468498.0	368.0	4.0
9430	20008	373	NaN	DW	8468498.0	368.0	5.0

	Game	Team	Time	PlayerID
0	20008	CF	46	8476344.0
1	20008	CF	46	8475758.0
2	20008	CF	46	8474291.0
3	20008	CF	46	8468498.0
4	20008	CF	46	8474590.0

	ZS	TimeSinceLastDraw	TOI	CA	Player	CA60
0	DL	1.0	87	0.0	Brooks Orpik	0.000000
1	DL	2.0	87	1.0	Brooks Orpik	41.379310
2	DL	3.0	85	3.0	Brooks Orpik	127.058824
3	DL	4.0	83	1.0	Brooks Orpik	43.373494
4	DL	5.0	84	7.0	Brooks Orpik	300.000000

	Time	Game	PlayerID
281384	41	20008	8468498.0
281385	42	20008	8468498.0
281386	43	20008	8468498.0
281387	44	20008	8468498.0
281388	45	20008	8468498.0

	TimeSinceLastDraw	TOI	ZS	Player
0	1	725	Other	Brooks Orpik
1	1	589	Other	Alex Ovechkin
2	1	501	Other	T.J. Oshie
3	1	452	Other	Matt Niskanen
4	1	323	Other	Taylor Chorney

	Player	TOI	TimeSinceLastDraw	ZS	TotalTOI
0	Aaron Ness	0.051948	1.0	DL	231
1	Aaron Ness	0.052174	2.0	DL	230
2	Aaron Ness	0.052174	3.0	DL	230
3	Aaron Ness	0.052632	4.0	DL	228
4	Aaron Ness	0.052863	5.0	DL	227