In [1]:
%load_ext rpy2.ipython

2.3: Classical confidence intervals

CI for continuous data, Pg 18


In [2]:
%%R
y <- c(35,34,38,35,37)
y


[1] 35 34 38 35 37

In [3]:
%%R
n <- length(y)
n


[1] 5

In [4]:
%%R
estimate <- mean(y)
estimate


[1] 35.8

In [5]:
%%R
se <- sd(y)/sqrt(n)
se


[1] 0.7348469

In [6]:
%%R
int.50 <- estimate + qt(c(.25,.75),n-1)*se
int.50


[1] 35.2557 36.3443

In [7]:
%%R
int.95 <- estimate + qt(c(.025,.975),n-1)*se
int.95


[1] 33.75974 37.84026

CI for proportions, Pg 18


In [8]:
%%R
y <- 700
y


[1] 700

In [9]:
%%R
n <- 1000
n


[1] 1000

In [10]:
%%R
estimate <- y/n
estimate


[1] 0.7

In [11]:
%%R
se <- sqrt(estimate*(1-estimate)/n)
se


[1] 0.01449138

In [12]:
%%R
int.95 <- estimate + qnorm(c(.025,.975))*se
int.95


[1] 0.6715974 0.7284026

CI for discrete data, Pg 18


In [13]:
%%R
y <- rep(c(0,1,2,3,4), c(600,300,50,30,20))
y


   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [593] 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [630] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [667] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [704] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [741] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [778] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [815] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [852] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [889] 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [926] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3
 [963] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[1000] 4

In [14]:
%%R
n <- length(y)
n


[1] 1000

In [15]:
%%R
estimate <- mean(y)
estimate


[1] 0.57

In [16]:
%%R
se <- sd(y)/sqrt(n)
se


[1] 0.02767428

In [17]:
%%R
int.50 <- estimate + qt(c(.25,.75),n-1)*se
int.50


[1] 0.5513272 0.5886728

In [18]:
%%R
int.95 <- estimate + qt(c(.025,.975),n-1)*se
int.95


[1] 0.5156936 0.6243064

Plot Figure 2.3, Pg 19


In [19]:
%%R
# Data is available in death.polls directory of ARM_Data
# Note: data seems to come from Gallup
# See: http://www.gallup.com/poll/1606/Death-Penalty.aspx
#
# Columns are
# 1- year
# 2- month
# 3- percentage support
# 4- percentage against
# 5- percentage no opinion
polls <- matrix(scan("../../ARM_Data/death.polls/polls.dat"),
                ncol=5, byrow=TRUE)
polls


/home/cstrelioff/.local/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Read 160 items

  res = super(Function, self).__call__(*new_args, **new_kwargs)
      [,1] [,2] [,3] [,4] [,5]
 [1,] 2002 10.0   70   25    5
 [2,] 2002  5.0   72   25    3
 [3,] 2001 10.0   68   26    6
 [4,] 2001  5.0   65   27    8
 [5,] 2001  2.0   67   25    8
 [6,] 2000  8.0   67   28    5
 [7,] 2000  6.0   66   26    8
 [8,] 2000  2.0   66   28    6
 [9,] 1999  5.0   71   22    7
[10,] 1995  9.0   77   13   10
[11,] 1994  9.0   80   16    4
[12,] 1991  6.0   76   18    6
[13,] 1988  9.5   79   16    5
[14,] 1988  9.0   79   16    5
[15,] 1986  1.0   70   22    8
[16,] 1985  9.0   75   17    8
[17,] 1985  1.0   72   20    8
[18,] 1981  1.5   66   25    9
[19,] 1978  3.0   62   27   11
[20,] 1976  4.0   66   26    8
[21,] 1972 11.0   57   32   11
[22,] 1972  3.0   50   41    9
[23,] 1971 10.5   49   40   11
[24,] 1969  1.0   51   40    9
[25,] 1967  6.0   54   38    8
[26,] 1966  5.0   42   47   11
[27,] 1965  1.0   45   43   12
[28,] 1960  3.0   53   36   11
[29,] 1957  8.5   47   34   18
[30,] 1956  3.5   53   34   13
[31,] 1953  9.0   68   25    7
[32,] 1937 12.0   60   33    7

In [20]:
%%R
# Note: this is the percentage support **among those that have an opinion** (pg 18)
#
# -- "No opinion" percentages are ignored
# -- !s a result, our plot below looks different from the Gallup plot
support <- polls[,3]/(polls[,3]+polls[,4])
support


 [1] 0.7368421 0.7422680 0.7234043 0.7065217 0.7282609 0.7052632 0.7173913
 [8] 0.7021277 0.7634409 0.8555556 0.8333333 0.8085106 0.8315789 0.8315789
[15] 0.7608696 0.8152174 0.7826087 0.7252747 0.6966292 0.7173913 0.6404494
[22] 0.5494505 0.5505618 0.5604396 0.5869565 0.4719101 0.5113636 0.5955056
[29] 0.5802469 0.6091954 0.7311828 0.6451613

In [21]:
%%R
year <-  polls[,1] + (polls[,2]-6)/12
year


 [1] 2002.333 2001.917 2001.333 2000.917 2000.667 2000.167 2000.000 1999.667
 [9] 1998.917 1995.250 1994.250 1991.000 1988.292 1988.250 1985.583 1985.250
[17] 1984.583 1980.625 1977.750 1975.833 1972.417 1971.750 1971.375 1968.583
[25] 1967.000 1965.917 1964.583 1959.750 1957.208 1955.792 1953.250 1937.500

In [22]:
%%R
# confidence intervals in plot assume sample size N=1000
# itervals are 68% confidence
par (mar=c(5,5,4,2)+.1)

plot(year, support*100, xlab="Year", ylim=c(min(100*support)-1, max(100*support)+1),
     ylab="Percentage support for the death penalty", cex=1.1, cex.main=1.2,
     cex.axis=1.1, cex.lab=1.1, pch=20)

for (i in 1:nrow(polls))
  lines (rep(year[i],2), 100*(support[i]+c(-1,1)*sqrt(support[i]*(1-support[i])/1000)))


Weighted averages, Pg 19

The example R-code for this part is incomplete, so I'll make up N, p and se loosely related to the text on page 19.


In [23]:
%%R
N <- c(66030000, 81083600, 60788845)
p <- c(0.55, 0.61, 0.38)
se <- c(0.02, 0.03, 0.03)

In [24]:
%%R
w.avg <- sum(N*p)/sum(N)
w.avg


[1] 0.523694

In [25]:
%%R
se.w.avg <- sqrt(sum((N*se/sum(N))^2))
se.w.avg


[1] 0.01594324

In [26]:
%%R
# this uses +/- 2 std devs
int.95 <- w.avg + c(-2,2)*se.w.avg
int.95


[1] 0.4918075 0.5555805

CI using simulations, Pg 20


In [27]:
%%R
n.men <- 500
n.men


[1] 500

In [28]:
%%R
p.hat.men <- 0.75
p.hat.men


[1] 0.75

In [29]:
%%R
se.men <- sqrt (p.hat.men*(1-p.hat.men)/n.men)
se.men


[1] 0.01936492

In [30]:
%%R
n.women <- 500
n.women


[1] 500

In [31]:
%%R
p.hat.women <- 0.65
p.hat.women


[1] 0.65

In [32]:
%%R
se.women <- sqrt(p.hat.women*(1-p.hat.women)/n.women)
se.women


[1] 0.02133073

In [33]:
%%R
n.sims <- 10000
n.sims


[1] 10000

In [34]:
%%R
p.men <- rnorm(n.sims, p.hat.men, se.men)
p.men[1:10] # show first ten


 [1] 0.7582202 0.7340700 0.7643722 0.7663267 0.7596531 0.7326290 0.7715561
 [8] 0.7452970 0.7648430 0.7681984

In [35]:
%%R
p.women <- rnorm(n.sims, p.hat.women, se.women)
p.women[1:10] # show first ten


 [1] 0.6646226 0.6686387 0.6650403 0.6642646 0.6767543 0.6412165 0.6456576
 [8] 0.6405765 0.6822134 0.6074093

In [36]:
%%R
ratio <- p.men/p.women
ratio[1:10] # show first ten


 [1] 1.140828 1.097857 1.149362 1.153647 1.122495 1.142561 1.194993 1.163478
 [9] 1.121120 1.264713

In [37]:
%%R
int.95 <- quantile(ratio, c(.025,.975))
int.95


    2.5%    97.5% 
1.064455 1.255405