Adapted from Analyzing Baseball Data with R, by Marchi and Albert (CRC Press)



In [1]:

    
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

DATA_DIR = '../data/lahman-csv_2015-01-24/'



In [2]:

    
teams = pd.read_csv(DATA_DIR+'Teams.csv')
teams.head()









    Out[2]:






  
    
      
      yearID
      lgID
      teamID
      franchID
      divID
      Rank
      G
      Ghome
      W
      L
      ...
      DP
      FP
      name
      park
      attendance
      BPF
      PPF
      teamIDBR
      teamIDlahman45
      teamIDretro
    
  
  
    
      0
      1871
      NaN
      BS1
      BNA
      NaN
      3
      31
      NaN
      20
      10
      ...
      NaN
      0.83
      Boston Red Stockings
      South End Grounds I
      NaN
      103
      98
      BOS
      BS1
      BS1
    
    
      1
      1871
      NaN
      CH1
      CNA
      NaN
      2
      28
      NaN
      19
      9
      ...
      NaN
      0.82
      Chicago White Stockings
      Union Base-Ball Grounds
      NaN
      104
      102
      CHI
      CH1
      CH1
    
    
      2
      1871
      NaN
      CL1
      CFC
      NaN
      8
      29
      NaN
      10
      19
      ...
      NaN
      0.81
      Cleveland Forest Citys
      National Association Grounds
      NaN
      96
      100
      CLE
      CL1
      CL1
    
    
      3
      1871
      NaN
      FW1
      KEK
      NaN
      7
      19
      NaN
      7
      12
      ...
      NaN
      0.80
      Fort Wayne Kekiongas
      Hamilton Field
      NaN
      101
      107
      KEK
      FW1
      FW1
    
    
      4
      1871
      NaN
      NY2
      NNA
      NaN
      5
      33
      NaN
      16
      17
      ...
      NaN
      0.83
      New York Mutuals
      Union Grounds (Brooklyn)
      NaN
      90
      88
      NYU
      NY2
      NY2
    
  

5 rows × 48 columns



In [178]:

    
since_2001 = teams.loc[teams.yearID > 2000, ['teamID', 'yearID', 'lgID', 'G', 'W', 'L', 'R', 'RA']]
since_2001.head()



In [179]:

    
since_2001 = since_2001.assign(RD=since_2001.R - since_2001.RA, Wpct=since_2001.W / (since_2001.W + since_2001.L))



In [180]:

    
axes = sns.regplot(since_2001.RD, since_2001.Wpct)
axes.set_xlabel('Run differential')
axes.set_ylabel('Winning percentage')









    Out[180]:





<matplotlib.text.Text at 0x114cc5470>

Bayesian regression using PyMC3:



In [244]:

    
import pymc3 as pm

with pm.Model() as run_model:
    
    pm.glm.glm('Wpct ~ RD', since_2001)
    trace = pm.sample(1000)
    pm.traceplot(trace, vars=['RD'])
    pm.summary(trace)









    



Assigned NUTS to Intercept
Assigned NUTS to RD
Assigned NUTS to sd_log
 [-----------------100%-----------------] 1000 of 1000 complete in 5.4 sec
Intercept:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.499            0.021            0.001            [0.498, 0.502]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.497          0.499          0.500          0.501          0.502


RD:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.001            0.000            0.000            [0.001, 0.001]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.001          0.001          0.001          0.001          0.001


sd_log:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -3.652           0.292            0.028            [-3.779, -3.618]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -3.763         -3.724         -3.691         -3.664         -3.081


sd:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.029            0.044            0.003            [0.023, 0.027]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.023          0.024          0.025          0.026          0.046



In [245]:

    
with pm.Model() as run_model_resid:
    
    β = pm.Normal('β', 0, 1e-6, shape=2)
    μ = β[0] + β[1] * since_2001.RD
    σ = pm.Uniform('σ', 0, 100)
    
    wp = pm.Normal('wp', μ, σ**-2, observed=since_2001.Wpct)
    
    ϵ = pm.Deterministic('ϵ', since_2001.Wpct.values - μ)
    
    trace = pm.sample(2000)









    



Assigned NUTS to β
Assigned NUTS to σ_interval
 [-----------------100%-----------------] 2000 of 2000 complete in 52.4 sec



In [246]:

    
pm.summary(trace[1000:], vars=['β'], roundto=6)









    



β:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.500048         0.001280         0.000035         [0.497660, 0.502642]
  0.000631         0.000012         0.000000         [0.000609, 0.000653]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.497619       0.499232       0.500027       0.500872       0.502616
  0.000607       0.000622       0.000631       0.000639       0.000652



In [247]:

    
pm.forestplot(trace[1000:], vars=['ϵ'], ylabels=['']*len(since_2001))









    Out[247]:





<matplotlib.gridspec.GridSpec at 0x116defa20>



In [248]:

    
axes = sns.regplot(since_2001.RD, trace['ϵ'][1000:].mean(axis=0))
axes.set_ylabel('Residual')
axes.set_xlabel('Run differential')









    Out[248]:





<matplotlib.text.Text at 0x11b9c8828>

Using scikit-learn:



In [182]:

    
from sklearn import linear_model

Wpct = since_2001.Wpct.values
RD = since_2001.RD.reshape(-1,1)

regr = linear_model.LinearRegression()
regr.fit(RD, Wpct, since_2001.Wpct)









    Out[182]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [183]:

    
regr.coef_









    Out[183]:





array([ 0.00062709])



In [184]:

    
Wpct_hat = regr.predict(RD)
ϵ = Wpct - Wpct_hat



In [185]:

    
axes = sns.regplot(since_2001.RD, ϵ)
axes.set_ylabel('Residual')
axes.set_xlabel('Run differential')









    Out[185]:





<matplotlib.text.Text at 0x115620da0>



In [186]:

    
regr.score(RD, Wpct)









    Out[186]:





0.88003554784382143

Root mean squared error (RMSE)



In [187]:

    
rmse = np.sqrt((ϵ**2).mean())



In [188]:

    
(np.abs(ϵ) < rmse).mean()









    Out[188]:





0.69285714285714284



In [189]:

    
(np.abs(ϵ) < 2*rmse).mean()









    Out[189]:





0.95714285714285718

Pythagorean wins for SF



In [243]:

    
R, RA = since_2001[['R', 'RA']].T.values
since_2001 = since_2001.assign(pythWins=162 * R**2 / (RA**2 + R**2))



In [191]:

    
since_2001[(since_2001.teamID=='SFN') & (since_2001.yearID==2011)]

Load 2011 game logs



In [192]:

    
games_2011 = pd.read_csv('../data/gl2011.txt', header=None)
games_2011.columns = pd.read_csv('../data/game_log_header.txt', header=None).values[0]
games_2011.head()









    Out[192]:






  
    
      
      Date
      DoubleHeader
      DayOfWeek
      VisitingTeam
      VisitingTeamLeague
      VisitingTeamGameNumber
      HomeTeam
      HomeTeamLeague
      HomeTeamGameNumber
      VisitorRunsScored
      ...
      HomeBatting7Name
      HomeBatting7Position
      HomeBatting8PlayerID
      HomeBatting8Name
      HomeBatting8Position
      HomeBatting9PlayerID
      HomeBatting9Name
      HomeBatting9Position
      AdditionalInfo
      AcquisitionInfo
    
  
  
    
      0
      20110331
      0
      Thu
      MIL
      NL
      1
      CIN
      NL
      1
      6
      ...
      Ramon Hernandez
      2
      janip001
      Paul Janish
      6
      volqe001
      Edinson Volquez
      1
      NaN
      Y
    
    
      1
      20110331
      0
      Thu
      SFN
      NL
      1
      LAN
      NL
      1
      1
      ...
      Rod Barajas
      2
      carrj001
      Jamey Carroll
      4
      kersc001
      Clayton Kershaw
      1
      NaN
      Y
    
    
      2
      20110331
      0
      Thu
      SDN
      NL
      1
      SLN
      NL
      1
      5
      ...
      Yadier Molina
      2
      schus001
      Skip Schumaker
      4
      carpc002
      Chris Carpenter
      1
      NaN
      Y
    
    
      3
      20110331
      0
      Thu
      ATL
      NL
      1
      WAS
      NL
      1
      2
      ...
      Danny Espinosa
      4
      rodri001
      Ivan Rodriguez
      2
      hernl003
      Livan Hernandez
      1
      NaN
      Y
    
    
      4
      20110331
      0
      Thu
      ANA
      AL
      1
      KCA
      AL
      1
      4
      ...
      Alcides Escobar
      6
      tream001
      Matt Treanor
      2
      getzc001
      Chris Getz
      4
      NaN
      Y
    
  

5 rows × 161 columns



In [193]:

    
SF2011 = games_2011.loc[(games_2011.HomeTeam=='SFN') | (games_2011.VisitingTeam=='SFN'), 
                   ("VisitingTeam", "HomeTeam", "VisitorRunsScored", "HomeRunsScore")]
SF2011









    Out[193]:






  
    
      
      VisitingTeam
      HomeTeam
      VisitorRunsScored
      HomeRunsScore
    
  
  
    
      1
      SFN
      LAN
      1
      2
    
    
      9
      SFN
      LAN
      3
      4
    
    
      21
      SFN
      LAN
      10
      0
    
    
      35
      SFN
      LAN
      5
      7
    
    
      58
      SFN
      SDN
      1
      3
    
    
      72
      SFN
      SDN
      8
      4
    
    
      97
      SLN
      SFN
      4
      5
    
    
      111
      SLN
      SFN
      2
      3
    
    
      127
      SLN
      SFN
      6
      1
    
    
      139
      LAN
      SFN
      6
      1
    
    
      149
      LAN
      SFN
      4
      5
    
    
      163
      LAN
      SFN
      3
      4
    
    
      182
      SFN
      ARI
      5
      2
    
    
      196
      SFN
      ARI
      5
      3
    
    
      210
      SFN
      ARI
      5
      6
    
    
      228
      SFN
      COL
      8
      1
    
    
      238
      SFN
      COL
      6
      3
    
    
      253
      SFN
      COL
      2
      10
    
    
      283
      ATL
      SFN
      4
      1
    
    
      296
      ATL
      SFN
      5
      2
    
    
      311
      ATL
      SFN
      9
      6
    
    
      334
      SFN
      PIT
      3
      2
    
    
      347
      SFN
      PIT
      0
      2
    
    
      359
      SFN
      PIT
      5
      2
    
    
      375
      SFN
      WAS
      0
      3
    
    
      390
      SFN
      WAS
      2
      1
    
    
      405
      SFN
      WAS
      2
      5
    
    
      417
      SFN
      WAS
      0
      2
    
    
      425
      SFN
      NYN
      7
      6
    
    
      441
      SFN
      NYN
      2
      0
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      1968
      HOU
      SFN
      1
      2
    
    
      1981
      HOU
      SFN
      4
      3
    
    
      1996
      CHN
      SFN
      7
      0
    
    
      2010
      CHN
      SFN
      5
      2
    
    
      2025
      CHN
      SFN
      0
      4
    
    
      2049
      ARI
      SFN
      2
      6
    
    
      2064
      ARI
      SFN
      7
      2
    
    
      2079
      ARI
      SFN
      4
      1
    
    
      2094
      SFN
      SDN
      7
      2
    
    
      2110
      SFN
      SDN
      6
      4
    
    
      2125
      SFN
      SDN
      1
      3
    
    
      2148
      LAN
      SFN
      2
      1
    
    
      2163
      LAN
      SFN
      3
      0
    
    
      2178
      LAN
      SFN
      1
      8
    
    
      2194
      SDN
      SFN
      3
      8
    
    
      2206
      SDN
      SFN
      2
      3
    
    
      2221
      SDN
      SFN
      1
      3
    
    
      2230
      SFN
      COL
      8
      5
    
    
      2242
      SFN
      COL
      9
      1
    
    
      2257
      SFN
      COL
      6
      5
    
    
      2272
      SFN
      COL
      12
      5
    
    
      2300
      SFN
      LAN
      1
      2
    
    
      2317
      SFN
      LAN
      8
      5
    
    
      2329
      SFN
      LAN
      2
      8
    
    
      2338
      SFN
      ARI
      1
      3
    
    
      2351
      SFN
      ARI
      2
      15
    
    
      2368
      SFN
      ARI
      2
      5
    
    
      2391
      COL
      SFN
      1
      3
    
    
      2406
      COL
      SFN
      0
      7
    
    
      2421
      COL
      SFN
      6
      3
    
  

162 rows × 4 columns

Calculate SF score differences



In [194]:

    
SF2011 = SF2011.assign(ScoreDiff=np.where(SF2011.HomeTeam=='SFN', SF2011.HomeRunsScore - SF2011.VisitorRunsScored, 
                                 SF2011.VisitorRunsScored - SF2011.HomeRunsScore))
SF2011 = SF2011.assign(Outcome=(SF2011.ScoreDiff > 0).replace({True:'win', False:'loss'}))



In [195]:

    
SF2011.groupby('Outcome').apply(lambda x: x.ScoreDiff.describe())



In [196]:

    
SF2011.hist(column='ScoreDiff', by='Outcome')









    Out[196]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x112605198>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x116135ef0>], dtype=object)

Losses were decided by larger margin than wins, leading to overperformance versus Pythagorean formula.

Pythagorean wins exponent model



In [253]:

    
with pm.Model() as pythag_model:
    
    exp = pm.Uniform('exp', 0, 5)
    
    pyth_wins = pm.Deterministic('pyth_wins', 162 * R**exp / (RA**exp + R**exp))
    
    wins = pm.Poisson('wins', pyth_wins, observed=since_2001.W.values)
    
    trace = pm.sample(2000)









    



Assigned NUTS to exp_interval
 [-----------------100%-----------------] 2000 of 2000 complete in 1.9 sec



In [254]:

    
pm.traceplot(trace, vars=['exp'])









    Out[254]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x120324dd8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11db84d68>]], dtype=object)



In [255]:

    
pm.summary(trace, vars=['exp'])









    



exp:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  1.893            0.079            0.003            [1.741, 2.040]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  1.744          1.842          1.892          1.941          2.046

One-run games



In [203]:

    
results = games_2011[['VisitingTeam', 'HomeTeam', 'VisitorRunsScored', 'HomeRunsScore']]



In [204]:

    
results = results.assign(Winner=results.HomeTeam.where(results.HomeRunsScore > results.VisitorRunsScored, 
                                                       results.VisitingTeam),
                        Difference=np.abs(results.VisitorRunsScored - results.HomeRunsScore))



In [205]:

    
results.head()









    Out[205]:






  
    
      
      VisitingTeam
      HomeTeam
      VisitorRunsScored
      HomeRunsScore
      Difference
      Winner
    
  
  
    
      0
      MIL
      CIN
      6
      7
      1
      CIN
    
    
      1
      SFN
      LAN
      1
      2
      1
      LAN
    
    
      2
      SDN
      SLN
      5
      3
      2
      SDN
    
    
      3
      ATL
      WAS
      2
      0
      2
      ATL
    
    
      4
      ANA
      KCA
      4
      2
      2
      ANA



In [206]:

    
one_run_games = results.query('Difference==1').Winner.value_counts()
one_run_games.name = 'OneRunGames'

Merge with residuals table



In [207]:

    
one_run_games.head()









    Out[207]:





SFN    33
CLE    30
MIL    30
DET    29
CIN    29
Name: OneRunGames, dtype: int64



In [208]:

    
teams_2011 = since_2001.query('yearID==2011').set_index('teamID').join(one_run_games)



In [211]:

    
teams_2011 = teams_2011.assign(pythResid=teams_2011.W - teams_2011.pythWins)



In [212]:

    
sns.regplot(teams_2011.OneRunGames, teams_2011.pythResid, 'ro')









    Out[212]:





<matplotlib.axes._subplots.AxesSubplot at 0x116c442e8>

Relationship between one-run wins and relief pitching



In [213]:

    
pitching = pd.read_csv('../data/lahman-csv_2015-01-24/Pitching.csv')
pitching.head()









    Out[213]:






  
    
      
      playerID
      yearID
      stint
      teamID
      lgID
      W
      L
      G
      GS
      CG
      ...
      IBB
      WP
      HBP
      BK
      BFP
      GF
      R
      SH
      SF
      GIDP
    
  
  
    
      0
      bechtge01
      1871
      1
      PH1
      NaN
      1
      2
      3
      3
      2
      ...
      NaN
      NaN
      NaN
      0
      NaN
      NaN
      42
      NaN
      NaN
      NaN
    
    
      1
      brainas01
      1871
      1
      WS3
      NaN
      12
      15
      30
      30
      30
      ...
      NaN
      NaN
      NaN
      0
      NaN
      NaN
      292
      NaN
      NaN
      NaN
    
    
      2
      fergubo01
      1871
      1
      NY2
      NaN
      0
      0
      1
      0
      0
      ...
      NaN
      NaN
      NaN
      0
      NaN
      NaN
      9
      NaN
      NaN
      NaN
    
    
      3
      fishech01
      1871
      1
      RC1
      NaN
      4
      16
      24
      24
      22
      ...
      NaN
      NaN
      NaN
      0
      NaN
      NaN
      257
      NaN
      NaN
      NaN
    
    
      4
      fleetfr01
      1871
      1
      NY2
      NaN
      0
      1
      1
      1
      1
      ...
      NaN
      NaN
      NaN
      0
      NaN
      NaN
      21
      NaN
      NaN
      NaN
    
  

5 rows × 30 columns



In [216]:

    
top_closers = pitching.query('GF > 50 & ERA < 2.5')[["playerID", "yearID", "teamID"]]



In [223]:

    
top_closers[top_closers.yearID==2011].set_index('teamID')









    Out[223]:






  
    
      
      playerID
      yearID
    
    
      teamID
      
      
    
  
  
    
      MIL
      axforjo01
      2011
    
    
      SDN
      bellhe01
      2011
    
    
      CIN
      cordefr01
      2011
    
    
      TBA
      farnsky01
      2011
    
    
      PIT
      hanrajo01
      2011
    
    
      ATL
      kimbrcr01
      2011
    
    
      ARI
      putzjj01
      2011
    
    
      NYA
      riverma01
      2011
    
    
      DET
      valvejo01
      2011



In [234]:

    
teams_2011.head()









    Out[234]:






  
    
      
      yearID
      lgID
      G
      W
      L
      R
      RA
      RD
      Wpct
      pythWins
      OneRunGames
      pythResid
    
    
      teamID
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ARI
      2011
      NL
      162
      94
      68
      731
      662
      69
      0.580247
      89.004768
      28
      4.995232
    
    
      ATL
      2011
      NL
      162
      89
      73
      641
      605
      36
      0.549383
      85.676674
      29
      3.323326
    
    
      BAL
      2011
      AL
      162
      69
      93
      708
      860
      -152
      0.425926
      65.442118
      22
      3.557882
    
    
      BOS
      2011
      AL
      162
      90
      72
      875
      737
      138
      0.555556
      94.767588
      19
      -4.767588
    
    
      CHA
      2011
      AL
      162
      79
      83
      654
      706
      -52
      0.487654
      74.814925
      24
      4.185075



In [241]:

    
top_closers_merged = top_closers[top_closers.yearID==2011].set_index('teamID').join(teams_2011, lsuffix='_a', rsuffix='_b')



In [242]:

    
top_closers_merged.pythResid.describe()









    Out[242]:





count    9.000000
mean     0.464949
std      5.166120
min     -7.578614
25%     -3.669926
50%      3.323326
75%      4.995232
max      5.802142
Name: pythResid, dtype: float64



In [ ]:

	yearID	lgID	teamID	franchID	divID	Rank	G	Ghome	W	L	...	DP	FP	name	park	attendance	BPF	PPF	teamIDBR	teamIDlahman45	teamIDretro
0	1871	NaN	BS1	BNA	NaN	3	31	NaN	20	10	...	NaN	0.83	Boston Red Stockings	South End Grounds I	NaN	103	98	BOS	BS1	BS1
1	1871	NaN	CH1	CNA	NaN	2	28	NaN	19	9	...	NaN	0.82	Chicago White Stockings	Union Base-Ball Grounds	NaN	104	102	CHI	CH1	CH1
2	1871	NaN	CL1	CFC	NaN	8	29	NaN	10	19	...	NaN	0.81	Cleveland Forest Citys	National Association Grounds	NaN	96	100	CLE	CL1	CL1
3	1871	NaN	FW1	KEK	NaN	7	19	NaN	7	12	...	NaN	0.80	Fort Wayne Kekiongas	Hamilton Field	NaN	101	107	KEK	FW1	FW1
4	1871	NaN	NY2	NNA	NaN	5	33	NaN	16	17	...	NaN	0.83	New York Mutuals	Union Grounds (Brooklyn)	NaN	90	88	NYU	NY2	NY2

	teamID	yearID	lgID	G	W	L	R	RA
2355	ANA	2001	AL	162	75	87	691	730
2356	ARI	2001	NL	162	92	70	818	677
2357	ATL	2001	NL	162	88	74	729	643
2358	BAL	2001	AL	162	63	98	687	829
2359	BOS	2001	AL	161	82	79	772	745

	Date	DayOfWeek	VisitingTeam	VisitingTeamLeague	VisitingTeamGameNumber	HomeTeam	HomeTeamLeague	HomeTeamGameNumber	VisitorRunsScored	...	HomeBatting7Name	HomeBatting7Position	HomeBatting8PlayerID	HomeBatting8Name	HomeBatting8Position	HomeBatting9PlayerID	HomeBatting9Name	HomeBatting9Position	AdditionalInfo	AcquisitionInfo
0	20110331	Thu	MIL	NL	1	CIN	NL	1	6	...	Ramon Hernandez	2	janip001	Paul Janish	6	volqe001	Edinson Volquez	1	NaN	Y
1	20110331	Thu	SFN	NL	1	LAN	NL	1	1	...	Rod Barajas	2	carrj001	Jamey Carroll	4	kersc001	Clayton Kershaw	1	NaN	Y
2	20110331	Thu	SDN	NL	1	SLN	NL	1	5	...	Yadier Molina	2	schus001	Skip Schumaker	4	carpc002	Chris Carpenter	1	NaN	Y
3	20110331	Thu	ATL	NL	1	WAS	NL	1	2	...	Danny Espinosa	4	rodri001	Ivan Rodriguez	2	hernl003	Livan Hernandez	1	NaN	Y
4	20110331	Thu	ANA	AL	1	KCA	AL	1	4	...	Alcides Escobar	6	tream001	Matt Treanor	2	getzc001	Chris Getz	4	NaN	Y

	VisitingTeam	HomeTeam	VisitorRunsScored	HomeRunsScore
1	SFN	LAN	1	2
9	SFN	LAN	3	4
21	SFN	LAN	10	0
35	SFN	LAN	5	7
58	SFN	SDN	1	3
72	SFN	SDN	8	4
97	SLN	SFN	4	5
111	SLN	SFN	2	3
127	SLN	SFN	6	1
139	LAN	SFN	6	1
149	LAN	SFN	4	5
163	LAN	SFN	3	4
182	SFN	ARI	5	2
196	SFN	ARI	5	3
210	SFN	ARI	5	6
228	SFN	COL	8	1
238	SFN	COL	6	3
253	SFN	COL	2	10
283	ATL	SFN	4	1
296	ATL	SFN	5	2
311	ATL	SFN	9	6
334	SFN	PIT	3	2
347	SFN	PIT	0	2
359	SFN	PIT	5	2
375	SFN	WAS	0	3
390	SFN	WAS	2	1
405	SFN	WAS	2	5
417	SFN	WAS	0	2
425	SFN	NYN	7	6
441	SFN	NYN	2	0
...	...	...	...	...
1968	HOU	SFN	1	2
1981	HOU	SFN	4	3
1996	CHN	SFN	7	0
2010	CHN	SFN	5	2
2025	CHN	SFN	0	4
2049	ARI	SFN	2	6
2064	ARI	SFN	7	2
2079	ARI	SFN	4	1
2094	SFN	SDN	7	2
2110	SFN	SDN	6	4
2125	SFN	SDN	1	3
2148	LAN	SFN	2	1
2163	LAN	SFN	3	0
2178	LAN	SFN	1	8
2194	SDN	SFN	3	8
2206	SDN	SFN	2	3
2221	SDN	SFN	1	3
2230	SFN	COL	8	5
2242	SFN	COL	9	1
2257	SFN	COL	6	5
2272	SFN	COL	12	5
2300	SFN	LAN	1	2
2317	SFN	LAN	8	5
2329	SFN	LAN	2	8
2338	SFN	ARI	1	3
2351	SFN	ARI	2	15
2368	SFN	ARI	2	5
2391	COL	SFN	1	3
2406	COL	SFN	0	7
2421	COL	SFN	6	3

ScoreDiff	count	mean	std	min	25%	50%	75%	max
Outcome
loss	76	-3.223684	2.436384	-13	-5	-3	-1	-1
win	86	2.755814	2.233099	1	1	2	3	12

	VisitingTeam	HomeTeam	VisitorRunsScored	HomeRunsScore	Difference	Winner
0	MIL	CIN	6	7	1	CIN
1	SFN	LAN	1	2	1	LAN
2	SDN	SLN	5	3	2	SDN
3	ATL	WAS	2	0	2	ATL
4	ANA	KCA	4	2	2	ANA

	playerID	yearID	stint	teamID	lgID	W	L	G	GS	CG	...	IBB	WP	HBP	BFP	GF	R	SH	SF	GIDP
0	bechtge01	1871	1	PH1	NaN	1	2	3	3	2	...	NaN	NaN	NaN	NaN	NaN	42	NaN	NaN	NaN
1	brainas01	1871	1	WS3	NaN	12	15	30	30	30	...	NaN	NaN	NaN	NaN	NaN	292	NaN	NaN	NaN
2	fergubo01	1871	1	NY2	NaN	0	0	1	0	0	...	NaN	NaN	NaN	NaN	NaN	9	NaN	NaN	NaN
3	fishech01	1871	1	RC1	NaN	4	16	24	24	22	...	NaN	NaN	NaN	NaN	NaN	257	NaN	NaN	NaN
4	fleetfr01	1871	1	NY2	NaN	0	1	1	1	1	...	NaN	NaN	NaN	NaN	NaN	21	NaN	NaN	NaN

	playerID	yearID
teamID
MIL	axforjo01	2011
SDN	bellhe01	2011
CIN	cordefr01	2011
TBA	farnsky01	2011
PIT	hanrajo01	2011
ATL	kimbrcr01	2011
ARI	putzjj01	2011
NYA	riverma01	2011
DET	valvejo01	2011

	yearID	lgID	G	W	L	R	RA	RD	Wpct	pythWins	OneRunGames	pythResid
teamID
ARI	2011	NL	162	94	68	731	662	69	0.580247	89.004768	28	4.995232
ATL	2011	NL	162	89	73	641	605	36	0.549383	85.676674	29	3.323326
BAL	2011	AL	162	69	93	708	860	-152	0.425926	65.442118	22	3.557882
BOS	2011	AL	162	90	72	875	737	138	0.555556	94.767588	19	-4.767588
CHA	2011	AL	162	79	83	654	706	-52	0.487654	74.814925	24	4.185075

	VisitingTeam	HomeTeam	VisitorRunsScored	HomeRunsScore	Difference	Winner
0	MIL	CIN	6	7	1	CIN
1	SFN	LAN	1	2	1	LAN
2	SDN	SLN	5	3	2	SDN
3	ATL	WAS	2	0	2	ATL
4	ANA	KCA	4	2	2	ANA

	VisitingTeam	HomeTeam	VisitorRunsScored	HomeRunsScore	Difference	Winner
0	MIL	CIN	6	7	1	CIN
1	SFN	LAN	1	2	1	LAN
2	SDN	SLN	5	3	2	SDN
3	ATL	WAS	2	0	2	ATL
4	ANA	KCA	4	2	2	ANA