In [1]:
from ggplot import *
import pandas as pd
import numpy as np
In [ ]:
%matplotlib inline
In [2]:
df = pd.read_csv("./baseball-pitches-clean.csv")
df = df[['pitch_time', 'inning', 'pitcher_name', 'hitter_name', 'pitch_type',
'px', 'pz', 'pitch_name', 'start_speed', 'end_speed', 'type_confidence']]
df.head()
Out[2]:
In [3]:
df['game_date'] = df.pitch_time.str.slice(0, 10)
df['pitch_count'] = 1
df['pitch_count'] = df.groupby(["pitcher_name", "game_date"]).pitch_count.cumsum()
In [4]:
df[["pitcher_name", "game_date", "pitch_count"]].head()
Out[4]:
In [5]:
df[["pitcher_name", "game_date", "pitch_count"]].tail()
Out[5]:
In [7]:
ggplot(aes(x='pitch_count', y='start_speed'), data=df) +\
geom_point()
Out[7]:
In [6]:
ggplot(aes(x='pitch_count', y='start_speed'), data=df) +\
stat_smooth()
Out[6]:
In [9]:
ggplot(aes(x='pitch_count', y='start_speed'), data=df) +\
stat_smooth(span=0.3)
Out[9]:
In [7]:
ggplot(aes(x='pitch_count', y='start_speed'), data=df) +\
stat_smooth(method='lm')
Out[7]:
In [8]:
ggplot(aes(x='pitch_count', y='start_speed', color='pitch_name'), data=df) +\
stat_smooth(se=False)
Out[8]:
In [12]:
df.pitcher_name.value_counts().head(10)
Out[12]:
In [9]:
pitcher = df[df.pitcher_name=="David Price"]
pitches = (pitcher.pitch_name.value_counts() > 10)
pitcher = pitcher[pitcher.pitch_name.isin(pitches[pitches].index)]
pitcher.head()
Out[9]:
In [14]:
ggplot(aes(x='inning', y='start_speed'), data=pitcher) +\
geom_jitter()
Out[14]:
In [15]:
ggplot(aes(x='inning', y='start_speed', color='pitch_name'), data=pitcher) +\
geom_jitter()
Out[15]:
In [16]:
ggplot(aes(x='inning', y='start_speed'), data=pitcher) +\
geom_jitter() +\
facet_wrap("pitch_name")
Out[16]:
In [10]:
ggplot(aes(x='inning', y='start_speed'), data=pitcher) +\
geom_jitter() +\
stat_smooth(method='lm', color='blue') +\
facet_wrap("pitch_name")
Out[10]:
In [18]:
ggplot(aes(x='pitch_count', y='start_speed', color='pitch_name'), data=pitcher) +\
geom_jitter()
Out[18]:
In [11]:
ggplot(aes(x='pitch_count', y='start_speed', color='pitch_name'), data=pitcher) +\
stat_smooth(se=False, size=8) + \
geom_point(alpha=0.2)
Out[11]:
In [12]:
pitcher = df[df.pitcher_name=="Justin Verlander"]
pitches = (pitcher.pitch_name.value_counts() > 10)
pitcher = pitcher[pitcher.pitch_name.isin(pitches[pitches].index)]
pitcher.head()
Out[12]:
In [21]:
ggplot(aes(x='pitch_count', y='start_speed', color='pitch_name'), data=pitcher) +\
stat_smooth(se=False, size=8) + \
geom_point(alpha=0.2)
# he actually throws harder as the game goes on!
Out[21]:
In [ ]: