In [1]:
load(url("http://www.openintro.org/stat/data/evals.RData"))

In [2]:
summary(evals)


Out[2]:
     score                 rank            ethnicity      gender   
 Min.   :2.300   teaching    :102   minority    : 64   female:195  
 1st Qu.:3.800   tenure track:108   not minority:399   male  :268  
 Median :4.300   tenured     :253                                  
 Mean   :4.175                                                     
 3rd Qu.:4.600                                                     
 Max.   :5.000                                                     
        language        age        cls_perc_eval     cls_did_eval   
 english    :435   Min.   :29.00   Min.   : 10.42   Min.   :  5.00  
 non-english: 28   1st Qu.:42.00   1st Qu.: 62.70   1st Qu.: 15.00  
                   Median :48.00   Median : 76.92   Median : 23.00  
                   Mean   :48.37   Mean   : 74.43   Mean   : 36.62  
                   3rd Qu.:57.00   3rd Qu.: 87.25   3rd Qu.: 40.00  
                   Max.   :73.00   Max.   :100.00   Max.   :380.00  
  cls_students    cls_level      cls_profs         cls_credits   bty_f1lower   
 Min.   :  8.00   lower:157   multiple:306   multi credit:436   Min.   :1.000  
 1st Qu.: 19.00   upper:306   single  :157   one credit  : 27   1st Qu.:2.000  
 Median : 29.00                                                 Median :4.000  
 Mean   : 55.18                                                 Mean   :3.963  
 3rd Qu.: 60.00                                                 3rd Qu.:5.000  
 Max.   :581.00                                                 Max.   :8.000  
  bty_f1upper     bty_f2upper      bty_m1lower     bty_m1upper   
 Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
 1st Qu.:4.000   1st Qu.: 4.000   1st Qu.:2.000   1st Qu.:3.000  
 Median :5.000   Median : 5.000   Median :3.000   Median :4.000  
 Mean   :5.019   Mean   : 5.214   Mean   :3.413   Mean   :4.147  
 3rd Qu.:7.000   3rd Qu.: 6.000   3rd Qu.:5.000   3rd Qu.:5.000  
 Max.   :9.000   Max.   :10.000   Max.   :7.000   Max.   :9.000  
  bty_m2upper       bty_avg           pic_outfit        pic_color  
 Min.   :1.000   Min.   :1.667   formal    : 77   black&white: 78  
 1st Qu.:4.000   1st Qu.:3.167   not formal:386   color      :385  
 Median :5.000   Median :4.333                                     
 Mean   :4.752   Mean   :4.418                                     
 3rd Qu.:6.000   3rd Qu.:5.500                                     
 Max.   :9.000   Max.   :8.167                                     

In [5]:
hist(evals$score)



In [8]:
plot(jitter(evals$score) ~ jitter(evals$bty_avg))



In [14]:
m_bty = lm(score ~ bty_avg,data=evals)
plot(m_bty)
abline(m_bty)



In [15]:
summary(m_bty)


Out[15]:
Call:
lm(formula = score ~ bty_avg, data = evals)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9246 -0.3690  0.1420  0.3977  0.9309 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.88034    0.07614   50.96  < 2e-16 ***
bty_avg      0.06664    0.01629    4.09 5.08e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5348 on 461 degrees of freedom
Multiple R-squared:  0.03502,	Adjusted R-squared:  0.03293 
F-statistic: 16.73 on 1 and 461 DF,  p-value: 5.083e-05

In [16]:
plot(evals$bty_avg ~ evals$bty_f1lower)
cor(evals$bty_avg, evals$bty_f1lower)


Out[16]:
[1] 0.8439112

In [22]:
plot(m_bty$residuals ~ evals$bty_avg)



In [26]:
hist(m_bty$residuals)
qqnorm(m_bty$residuals)
qqline(m_bty$residuals)
plot(m_bty$residuals)



In [27]:
plot(evals[,13:19])



In [19]:
m_bty_gen = lm(score ~ bty_avg + gender , data=evals)
summary(m_bty_gen)


Out[19]:
Call:
lm(formula = score ~ bty_avg + gender, data = evals)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8305 -0.3625  0.1055  0.4213  0.9314 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.74734    0.08466  44.266  < 2e-16 ***
bty_avg      0.07416    0.01625   4.563 6.48e-06 ***
gendermale   0.17239    0.05022   3.433 0.000652 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5287 on 460 degrees of freedom
Multiple R-squared:  0.05912,	Adjusted R-squared:  0.05503 
F-statistic: 14.45 on 2 and 460 DF,  p-value: 8.177e-07

In [28]:
multiLines(m_bty_gen)



In [30]:
m_bty_rank = lm(score ~ bty_avg + rank, data=evals)
summary(m_bty_rank)


Out[30]:
Call:
lm(formula = score ~ bty_avg + rank, data = evals)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8713 -0.3642  0.1489  0.4103  0.9525 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)       3.98155    0.09078  43.860  < 2e-16 ***
bty_avg           0.06783    0.01655   4.098 4.92e-05 ***
ranktenure track -0.16070    0.07395  -2.173   0.0303 *  
ranktenured      -0.12623    0.06266  -2.014   0.0445 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5328 on 459 degrees of freedom
Multiple R-squared:  0.04652,	Adjusted R-squared:  0.04029 
F-statistic: 7.465 on 3 and 459 DF,  p-value: 6.88e-05

In [31]:
m_full <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m_full)


Out[31]:
Call:
lm(formula = score ~ rank + ethnicity + gender + language + age + 
    cls_perc_eval + cls_students + cls_level + cls_profs + cls_credits + 
    bty_avg, data = evals)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.84482 -0.31367  0.08559  0.35732  1.10105 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)            3.5305036  0.2408200  14.660  < 2e-16 ***
ranktenure track      -0.1070121  0.0820250  -1.305 0.192687    
ranktenured           -0.0450371  0.0652185  -0.691 0.490199    
ethnicitynot minority  0.1869649  0.0775329   2.411 0.016290 *  
gendermale             0.1786166  0.0515346   3.466 0.000579 ***
languagenon-english   -0.1268254  0.1080358  -1.174 0.241048    
age                   -0.0066498  0.0030830  -2.157 0.031542 *  
cls_perc_eval          0.0056996  0.0015514   3.674 0.000268 ***
cls_students           0.0004455  0.0003585   1.243 0.214596    
cls_levelupper         0.0187105  0.0555833   0.337 0.736560    
cls_profssingle       -0.0085751  0.0513527  -0.167 0.867458    
cls_creditsone credit  0.5087427  0.1170130   4.348  1.7e-05 ***
bty_avg                0.0612651  0.0166755   3.674 0.000268 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.504 on 450 degrees of freedom
Multiple R-squared:  0.1635,	Adjusted R-squared:  0.1412 
F-statistic: 7.331 on 12 and 450 DF,  p-value: 2.406e-12

In [32]:
m_full <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_credits + bty_avg, data = evals)
summary(m_full)


Out[32]:
Call:
lm(formula = score ~ rank + ethnicity + gender + language + age + 
    cls_perc_eval + cls_students + cls_level + cls_credits + 
    bty_avg, data = evals)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.85048 -0.31394  0.08052  0.35956  1.10356 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)            3.5286297  0.2402990  14.684  < 2e-16 ***
ranktenure track      -0.1073638  0.0819096  -1.311 0.190606    
ranktenured           -0.0453744  0.0651169  -0.697 0.486278    
ethnicitynot minority  0.1893718  0.0760992   2.488 0.013189 *  
gendermale             0.1780270  0.0513581   3.466 0.000578 ***
languagenon-english   -0.1265737  0.1079088  -1.173 0.241427    
age                   -0.0066619  0.0030788  -2.164 0.031006 *  
cls_perc_eval          0.0056790  0.0015448   3.676 0.000265 ***
cls_students           0.0004493  0.0003573   1.257 0.209319    
cls_levelupper         0.0183743  0.0554870   0.331 0.740687    
cls_creditsone credit  0.5109162  0.1161614   4.398 1.36e-05 ***
bty_avg                0.0611497  0.0166432   3.674 0.000267 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5035 on 451 degrees of freedom
Multiple R-squared:  0.1635,	Adjusted R-squared:  0.1431 
F-statistic: 8.012 on 11 and 451 DF,  p-value: 8.303e-13

In [38]:
m1 <- lm(score ~ ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m1)$adj.r.squared


Out[38]:
[1] 0.1417823

In [36]:
m2 <- lm(score ~ gender + language + age + cls_perc_eval 
             + cbls_students + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m2)$adj.r.squared


Out[36]:
[1] 0.1309549

In [43]:
m_full <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m_full)$adj.r.squared


Out[43]:
[1] 0.1412172

In [44]:
#bty_average
m3 <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_profs + cls_credits, data = evals)
summary(m3)$adj.r.squared


Out[44]:
[1] 0.1174189

In [45]:
#cls_profs
m4 <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_credits + bty_avg, data = evals)
summary(m4)$adj.r.squared


Out[45]:
[1] 0.1430683

In [47]:
#cls_students
m5 <- lm(score ~ rank + ethnicity + gender + language + age + cls_perc_eval 
               + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m5)$adj.r.squared


Out[47]:
[1] 0.1401804

In [48]:
#rank
m6 <- lm(score ~ ethnicity + gender + language + age + cls_perc_eval 
             + cls_students + cls_level + cls_profs + cls_credits + bty_avg, data = evals)
summary(m6)$adj.r.squared


Out[48]:
[1] 0.1417823

In [ ]: