In [1]:
load(url("http://www.openintro.org/stat/data/mlb11.RData"))

In [4]:
head(mlb11)


Out[4]:
                 team runs at_bats hits homeruns bat_avg strikeouts
1       Texas Rangers  855    5659 1599      210   0.283        930
2      Boston Red Sox  875    5710 1600      203   0.280       1108
3      Detroit Tigers  787    5563 1540      169   0.277       1143
4  Kansas City Royals  730    5672 1560      129   0.275       1006
5 St. Louis Cardinals  762    5532 1513      162   0.273        978
6       New York Mets  718    5600 1477      108   0.264       1085
  stolen_bases wins new_onbase new_slug new_obs
1          143   96      0.340    0.460   0.800
2          102   90      0.349    0.461   0.810
3           49   95      0.340    0.434   0.773
4          153   71      0.329    0.415   0.744
5           57   90      0.341    0.425   0.766
6          130   77      0.335    0.391   0.725

In [6]:
summary(mlb11)


Out[6]:
                   team         runs          at_bats          hits     
 Arizona Diamondbacks: 1   Min.   :556.0   Min.   :5417   Min.   :1263  
 Atlanta Braves      : 1   1st Qu.:629.0   1st Qu.:5448   1st Qu.:1348  
 Baltimore Orioles   : 1   Median :705.5   Median :5516   Median :1394  
 Boston Red Sox      : 1   Mean   :693.6   Mean   :5524   Mean   :1409  
 Chicago Cubs        : 1   3rd Qu.:734.0   3rd Qu.:5575   3rd Qu.:1441  
 Chicago White Sox   : 1   Max.   :875.0   Max.   :5710   Max.   :1600  
 (Other)             :24                                                
    homeruns        bat_avg         strikeouts    stolen_bases   
 Min.   : 91.0   Min.   :0.2330   Min.   : 930   Min.   : 49.00  
 1st Qu.:118.0   1st Qu.:0.2447   1st Qu.:1085   1st Qu.: 89.75  
 Median :154.0   Median :0.2530   Median :1140   Median :107.00  
 Mean   :151.7   Mean   :0.2549   Mean   :1150   Mean   :109.30  
 3rd Qu.:172.8   3rd Qu.:0.2602   3rd Qu.:1248   3rd Qu.:130.75  
 Max.   :222.0   Max.   :0.2830   Max.   :1323   Max.   :170.00  
                                                                 
      wins          new_onbase        new_slug         new_obs      
 Min.   : 56.00   Min.   :0.2920   Min.   :0.3480   Min.   :0.6400  
 1st Qu.: 72.00   1st Qu.:0.3110   1st Qu.:0.3770   1st Qu.:0.6920  
 Median : 80.00   Median :0.3185   Median :0.3985   Median :0.7160  
 Mean   : 80.97   Mean   :0.3205   Mean   :0.3988   Mean   :0.7191  
 3rd Qu.: 90.00   3rd Qu.:0.3282   3rd Qu.:0.4130   3rd Qu.:0.7382  
 Max.   :102.00   Max.   :0.3490   Max.   :0.4610   Max.   :0.8100  
                                                                    

In [7]:
library(ggplot2)

In [11]:
plot(mlb11$runs~mlb11$at_bats)



In [12]:
cor(mlb11$runs, mlb11$at_bats)


Out[12]:
[1] 0.610627

In [13]:
plot_ss(x = mlb11$at_bats, y = mlb11$runs)


                                
Call:
lm(formula = y ~ x, data = pts)

Coefficients:
(Intercept)            x  
 -2789.2429       0.6305  

Sum of Squares:  123721.9

In [14]:
plot_ss(x = mlb11$at_bats, y = mlb11$runs,showSquares=T)


                                
Call:
lm(formula = y ~ x, data = pts)

Coefficients:
(Intercept)            x  
 -2789.2429       0.6305  

Sum of Squares:  123721.9

In [15]:
ml= lm(runs ~ at_bats, data = mlb11)

In [16]:
summary(ml)


Out[16]:
Call:
lm(formula = runs ~ at_bats, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-125.58  -47.05  -16.59   54.40  176.87 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2789.2429   853.6957  -3.267 0.002871 ** 
at_bats         0.6305     0.1545   4.080 0.000339 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 66.47 on 28 degrees of freedom
Multiple R-squared:  0.3729,	Adjusted R-squared:  0.3505 
F-statistic: 16.65 on 1 and 28 DF,  p-value: 0.0003388

In [17]:
summary(lm(runs ~ homeruns, data = mlb11))


Out[17]:
Call:
lm(formula = runs ~ homeruns, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-91.615 -33.410   3.231  24.292 104.631 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 415.2389    41.6779   9.963 1.04e-10 ***
homeruns      1.8345     0.2677   6.854 1.90e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 51.29 on 28 degrees of freedom
Multiple R-squared:  0.6266,	Adjusted R-squared:  0.6132 
F-statistic: 46.98 on 1 and 28 DF,  p-value: 1.9e-07

In [19]:
plot(mlb11$runs ~ mlb11$at_bats)
abline(ml)


To checked for linearity


In [20]:
plot(ml$residuals ~ mlb11$at_bats)
abline(h=0, lty=3)


Nearly normal residuals


In [21]:
hist(ml$residuals)



In [24]:
qqnorm(ml$residuals)
qqline(ml$residuals)



In [27]:
summary(lm(runs ~ at_bats, data = mlb11))
plot(mlb11$runs ~ mlb11$at_bats)


Out[27]:
Call:
lm(formula = runs ~ at_bats, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-125.58  -47.05  -16.59   54.40  176.87 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2789.2429   853.6957  -3.267 0.002871 ** 
at_bats         0.6305     0.1545   4.080 0.000339 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 66.47 on 28 degrees of freedom
Multiple R-squared:  0.3729,	Adjusted R-squared:  0.3505 
F-statistic: 16.65 on 1 and 28 DF,  p-value: 0.0003388

In [28]:
summary(lm(runs ~ hits, data = mlb11))
plot(mlb11$runs ~ mlb11$hits)


Out[28]:
Call:
lm(formula = runs ~ hits, data = mlb11)

Residuals:
     Min       1Q   Median       3Q      Max 
-103.718  -27.179   -5.233   19.322  140.693 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -375.5600   151.1806  -2.484   0.0192 *  
hits           0.7589     0.1071   7.085 1.04e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 50.23 on 28 degrees of freedom
Multiple R-squared:  0.6419,	Adjusted R-squared:  0.6292 
F-statistic:  50.2 on 1 and 28 DF,  p-value: 1.043e-07

In [29]:
summary(lm(runs ~ wins, data = mlb11))
plot(mlb11$runs ~ mlb11$wins)


Out[29]:
Call:
lm(formula = runs ~ wins, data = mlb11)

Residuals:
     Min       1Q   Median       3Q      Max 
-145.450  -47.506   -7.482   47.346  142.186 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  342.121     89.223   3.834 0.000654 ***
wins           4.341      1.092   3.977 0.000447 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 67.1 on 28 degrees of freedom
Multiple R-squared:  0.361,	Adjusted R-squared:  0.3381 
F-statistic: 15.82 on 1 and 28 DF,  p-value: 0.0004469

In [30]:
summary(lm(runs ~ bat_avg, data = mlb11))
plot(mlb11$runs ~ mlb11$bat_avg)


Out[30]:
Call:
lm(formula = runs ~ bat_avg, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-94.676 -26.303  -5.496  28.482 131.113 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -642.8      183.1  -3.511  0.00153 ** 
bat_avg       5242.2      717.3   7.308 5.88e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 49.23 on 28 degrees of freedom
Multiple R-squared:  0.6561,	Adjusted R-squared:  0.6438 
F-statistic: 53.41 on 1 and 28 DF,  p-value: 5.877e-08

In [31]:
summary(lm(runs ~ new_obs, data = mlb11))
plot(mlb11$runs ~ mlb11$new_obs)


Out[31]:
Call:
lm(formula = runs ~ new_obs, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.456 -13.690   1.165  13.935  41.156 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -686.61      68.93  -9.962 1.05e-10 ***
new_obs      1919.36      95.70  20.057  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 21.41 on 28 degrees of freedom
Multiple R-squared:  0.9349,	Adjusted R-squared:  0.9326 
F-statistic: 402.3 on 1 and 28 DF,  p-value: < 2.2e-16

In [32]:
summary(lm(runs ~ new_slug, data = mlb11))
plot(mlb11$runs ~ mlb11$new_slug)


Out[32]:
Call:
lm(formula = runs ~ new_slug, data = mlb11)

Residuals:
   Min     1Q Median     3Q    Max 
-45.41 -18.66  -0.91  16.29  52.29 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -375.80      68.71   -5.47 7.70e-06 ***
new_slug     2681.33     171.83   15.61 2.42e-15 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 26.96 on 28 degrees of freedom
Multiple R-squared:  0.8969,	Adjusted R-squared:  0.8932 
F-statistic: 243.5 on 1 and 28 DF,  p-value: 2.42e-15

In [33]:
summary(lm(runs ~ new_onbase, data = mlb11))
plot(mlb11$runs ~ mlb11$new_onbase)


Out[33]:
Call:
lm(formula = runs ~ new_onbase, data = mlb11)

Residuals:
    Min      1Q  Median      3Q     Max 
-58.270 -18.335   3.249  19.520  69.002 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -1118.4      144.5  -7.741 1.97e-08 ***
new_onbase    5654.3      450.5  12.552 5.12e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 32.61 on 28 degrees of freedom
Multiple R-squared:  0.8491,	Adjusted R-squared:  0.8437 
F-statistic: 157.6 on 1 and 28 DF,  p-value: 5.116e-13

In [ ]: