In [1]:
from ggplot import *
import pandas as pd
import numpy as np
In [ ]:
%matplotlib inline
In [2]:
df = pd.read_csv("./baseball-pitches-clean.csv")
df = df[['pitch_time', 'inning', 'pitcher_name', 'hitter_name', 'pitch_type',
'sz_top', 'sz_bottom',
'px', 'pz', 'pitch_name', 'start_speed', 'end_speed', 'type_confidence']]
df.head()
Out[2]:
In [3]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=df) +\
geom_jitter()
Out[3]:
In [4]:
ggplot(aes(x='px', y='pz', color='pitch_type', shape='pitch_name'), data=df) +\
geom_jitter()
Out[4]:
In [3]:
df.hitter_name.value_counts().head(10)
Out[3]:
In [4]:
hitter = df[df.hitter_name=='Mike Trout']
hitter.head()
Out[4]:
In [5]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point()
Out[5]:
In [6]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point() +\
scale_color_brewer()
Out[6]:
In [8]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point() +\
scale_color_brewer(type='qual', palette=4)
Out[8]:
In [6]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point() +\
geom_hline(yintercept=[hitter.sz_bottom.mean(), hitter.sz_top.mean()], color='red')
Out[6]:
In [7]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point() +\
geom_hline(yintercept=[hitter.sz_bottom.mean(), hitter.sz_top.mean()], color='red') +\
geom_vline(xintercept=[-1, 1], color='red')
Out[7]:
In [8]:
ggplot(aes(x='px', y='pz', color='pitch_type'), data=hitter) +\
geom_point() +\
geom_hline(yintercept=[hitter.sz_bottom.mean(), hitter.sz_top.mean()], color='red') +\
geom_vline(xintercept=[-1, 1], color='red') +\
coord_equal()
Out[8]:
In [9]:
fastballs = df[df.pitch_name=='Fastball']
fastballs.head()
Out[9]:
In [10]:
ggplot(aes(x='start_speed', y='end_speed', color='start_speed - end_speed'), data=fastballs.head(10000)) +\
geom_point() +\
scale_color_gradient()
Out[10]:
In [12]:
ggplot(aes(x='start_speed', y='end_speed', color='start_speed - end_speed'), data=fastballs.head(10000)) +\
geom_point() +\
scale_color_gradient(low="blue", high="red") +\
geom_abline(intercept=10) +\
coord_equal()
Out[12]:
In [13]:
pitcher = df[df.pitcher_name=="C.J. Wilson"]
In [70]:
ggplot(aes(x='px', y='pz', color='start_speed'), data=pitcher) +\
geom_point() +\
scale_color_gradient(low='yellow', high='red') +\
coord_equal() + \
xlab("Horizontal Position at Home Plate") +\
ylab("Vertical Position at Home Plate") +\
ggtitle("C.J. Wilson Pitch Map")
Out[70]:
In [14]:
ggplot(aes(x='px', y='pz', color='start_speed', shape='pitch_name'), data=pitcher) +\
geom_point() +\
scale_color_gradient(low='white', high='black') +\
coord_equal()
Out[14]:
In [16]:
def normalize_pitch(speed):
return (speed - speed.min()) / (speed.max() - speed.min())
pitcher['pitch_speed_norm'] = pitcher.groupby(["pitcher_name", "pitch_name"]).start_speed.apply(normalize_pitch)
In [71]:
ggplot(aes(x='px', y='pz', color='pitch_speed_norm'), data=pitcher) +\
geom_point() +\
geom_hline(yintercept=[pitcher.sz_bottom.mean(), pitcher.sz_top.mean()], color='blue') +\
geom_vline(xintercept=[-1, 1], color='blue') +\
scale_color_gradient(low='yellow', high='red') +\
facet_wrap("pitch_name", scales="fixed") +\
ggtitle("C.J. Wilson Normalized Pitch Speeds")
Out[71]:
In [68]:
ggplot(aes(x='px', y='pz', color='pitch_speed_norm'), data=pitcher[pitcher.pitch_name=="Slider"]) +\
geom_point() +\
geom_hline(yintercept=[pitcher.sz_bottom.mean(), pitcher.sz_top.mean()], color='blue') +\
geom_vline(xintercept=[-1, 1], color='blue') +\
scale_color_gradient(low='yellow', high='red') +\
facet_wrap("pitch_name", scales="fixed")
Out[68]:
In [72]:
ggplot(aes(x='px', y='pz', color='pitch_speed_norm'), data=pitcher[pitcher.pitch_name=="Slider"]) +\
geom_point() +\
geom_hline(yintercept=[pitcher.sz_bottom.mean(), pitcher.sz_top.mean()], color='blue') +\
geom_vline(xintercept=[-1, 1], color='blue') +\
scale_color_gradient(low='yellow', high='red') +\
facet_wrap("pitch_name", scales="fixed") +\
theme_seaborn()
Out[72]:
In [73]:
ggplot(aes(x='px', y='pz', color='pitch_speed_norm'), data=pitcher[pitcher.pitch_name=="Slider"]) +\
geom_point() +\
geom_hline(yintercept=[pitcher.sz_bottom.mean(), pitcher.sz_top.mean()], color='blue') +\
geom_vline(xintercept=[-1, 1], color='blue') +\
scale_color_gradient(low='yellow', high='red') +\
facet_wrap("pitch_name", scales="fixed") +\
theme_xkcd()
Out[73]:
In [78]:
ggplot(aes(x='inning', y='start_speed'), data=df) + stat_smooth(color='steelblue') + theme_xkcd()
Out[78]:
In [ ]: