In [1]:
import numpy as np
from selection.truncated import *
import nose.tools as nt
%load_ext rmagic
%pylab inline


Populating the interactive namespace from numpy and matplotlib

This is my attempt to port Will's UMPU to python. First I will see if I can just run it from python:


In [2]:
%%R
### Construct UMAU intervals for a truncated Gaussian

## Simple functions for specifying and checking a "cutoffs" object, which
##   is just a matrix, each of whose rows is a non-empty real interval (infinite endpoints allowed)

check.cutoffs <- function(cutoffs) {
    if(!is.matrix(cutoffs) || dim(cutoffs)[2] != 2) stop("cutoffs should be a matrix with 2 columns")
    if(sum(cutoffs[,2] <= cutoffs[,1]) > 0) stop("all right endpoints should be > left endpoints")
    if(sum(diff(c(t(cutoffs))) <= 0) > 0) stop("endpoints should be strictly increasing")
}

Negation


In [3]:
%%R
negate.cutoffs <- function(cutoffs) {
    -cutoffs[nrow(cutoffs):1,2:1,drop=FALSE]
}

negate.cutoffs(rbind(c(-Inf,-4),c(-3,-2),c(7,Inf)))


     [,1] [,2]
[1,] -Inf   -7
[2,]    2    3
[3,]    4  Inf

In [4]:
tg = truncated_gaussian([(-np.inf,-4), (-3,-2), (7, np.inf)])
tg.negated.intervals


Out[4]:
array([[-inf,  -7.],
       [  2.,   3.],
       [  4.,  inf]])

Two-sided


In [5]:
%%R
two.sided.cutoff <- function(x) rbind(neg=c(-Inf,-abs(x)),pos=c(abs(x),Inf))
two.sided.cutoff(3)


    [,1] [,2]
neg -Inf   -3
pos    3  Inf

In [6]:
three = truncated_gaussian.twosided(3)
three.intervals


Out[6]:
array([[-inf,  -3.],
       [  3.,  inf]])

pnorm.cutoffs


In [7]:
%%R


## Compute Phi(b-mu) - Phi(a-mu) in a numerically robust way
## mu can be a vector
pnorm.interval <- function(mu, ab) {
    ifelse(mean(ab) - mu < 0,
           pnorm(ab[2] - mu) - pnorm(ab[1] - mu),
           pnorm(mu - ab[1]) - pnorm(mu - ab[2]))
}

## Compute Phi(b-mu) - Phi(a-mu) for each [a,b] in S
pnorm.cutoffs <- function(mu, cutoffs) {
    ret <- apply(cutoffs, 1, function(cut) pnorm.interval(mu, cut))
    if(!is.matrix(ret)) ret <- t(ret)
    dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
    ret
}

print(pnorm.cutoffs(-1:1,two.sided.cutoff(3)))
print(pnorm.cutoffs(-1,two.sided.cutoff(3)))


            neg          pos
-1 2.275013e-02 3.167124e-05
0  1.349898e-03 1.349898e-03
1  3.167124e-05 2.275013e-02
          neg          pos
-1 0.02275013 3.167124e-05

In [8]:
for mu in [-1,0,1]:
    three.mu = mu
    print np.array(three.P, np.float128)


[ 0.022750132  3.1671242e-05]
[ 0.001349898  0.001349898]
[ 3.1671242e-05  0.022750132]

dnorm.cutoffs


In [9]:
%%R
## Compute phi(b-mu) - phi(a-mu) for each [a,b] in S
## mu can be a vector
dnorm.cutoffs <- function(mu, cutoffs) {
    ret <- apply(cutoffs, 1, function(cut) dnorm(cut[2] - mu) - dnorm(cut[1] - mu))
    if(!is.matrix(ret)) ret <- t(ret)
    dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
    ret
}

print(dnorm.cutoffs(-1:1,two.sided.cutoff(3)))
print(dnorm.cutoffs(-1,two.sided.cutoff(3)))


            neg           pos
-1 0.0539909665 -0.0001338302
0  0.0044318484 -0.0044318484
1  0.0001338302 -0.0539909665
          neg           pos
-1 0.05399097 -0.0001338302

In [10]:
for mu in [-1,0,1]:
    three.mu = mu
    print np.array(three.D[:,1] - three.D[:,0], np.float128)


[ 0.053990967 -0.00013383023]
[ 0.0044318484 -0.0044318484]
[ 0.00013383023 -0.053990967]

Fmu


In [11]:
%%R

## Compute P_mu(X<x)
## mu CANNOT be a vector, pk is a one-row matrix or vector
## x must be in one of the intervals
F.mu <- function(x, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
    stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
    K <- length(pk)
    p <- sum(pk)
    k <- which(x >= cutoffs[,1] & x <= cutoffs[,2])
    stopifnot(length(k)==1)
    (sum(pk[(1:K) < k]) + pnorm.interval(mu, c(cutoffs[k,1], x)) ) / p
}

F.mu(-10.01,0,two.sided.cutoff(10))


[1] 0.4519527

In [12]:
%%R
F.mu(-9,0,two.sided.cutoff(10))


Error: length(k) == 1 is not TRUE
In addition: There were 40 warnings (use warnings() to see them)
Error: length(k) == 1 is not TRUE

In [13]:
%%R
F.mu(10.2,0,two.sided.cutoff(10))


[1] 0.9349487

In [14]:
ten = truncated_gaussian.twosided(10)
print np.array(ten.CDF(-10.01), np.float128)


0.451952749586

In [15]:
nt.assert_raises(ValueError, ten.CDF, (-9,))

In [16]:
print np.array(ten.CDF(10.2), np.float128)


0.934948711667

Fmu.inv and c2


In [17]:
%%R

## Compute the inverse of the previous function
## mu CANNOT be a vector, pk is a one-row matrix or vector
F.inv.mu <- function(F, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
    stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
    p <- sum(pk)
    k <- max(which(c(0,cumsum(pk))/p < F))
    pnorm.increment <- p*F - c(0,cumsum(pk))[k]
    if(mean(cutoffs[k,]) < 0)
	mu + qnorm(pnorm(cutoffs[k,1]-mu) + pnorm.increment)
    else
        mu + qnorm(pnorm(cutoffs[k,1]-mu,lower.tail=FALSE) - pnorm.increment,lower.tail=FALSE)
}

        ## Compute c2(c1) for a single c1
## mu CANNOT be a vector, pk is a one-row matrix or vector
c2.single <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
    stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
    K <- length(pk)
    
    alpha1 <- F.mu(c1, mu, cutoffs, pk)
    if(alpha1 > alpha) return(NA)

    alpha2 <- alpha-alpha1
    return(F.inv.mu(1-alpha2, mu, cutoffs, pk))
}

c2.single(-10.3, 0, .05, two.sided.cutoff(10))
F.mu(-10.3, 0, two.sided.cutoff(10))

## Do the same, for a vector of c1 and mu
c2 <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
    sapply(1:length(c1),function(i)
           c2.single(c1[i], mu[i], alpha, cutoffs,
                     pk[i,,drop=FALSE]))
}
c2(-10.3, 0, .05, two.sided.cutoff(10))


     pos 
10.28548 

In [18]:
ten.mu = 0
print np.array(ten.right_endpoint(-10.3, 0.05), np.float128)


10.2854765288

g.mu


In [19]:
%%R

## Compute g_mu(c1) for a single mu and c1 (see LaTeX documentation)
## c1 and mu CANNOT be vectors, pk is NOT a matrix
g.mu.single <- function(c1, mu, alpha, cutoffs,
                        pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
    const <- (1-alpha) * (sum(-dk) + mu * sum(pk))
    cc2 <- c2(c1, mu, alpha, cutoffs, pk)
    if(is.na(cc2)) return(Inf)

    K <- length(pk)
    p <- sum(pk)
    k1 <- which(c1 >= cutoffs[,1] & c1 <= cutoffs[,2])
    stopifnot(length(k1)==1)
    k2 <- which(cc2 >= cutoffs[,1] & cc2 <= cutoffs[,2])
    stopifnot(length(k2)==1)

    if(k1 < k2) {
        sum(-dk[(1:K) > k1 & (1:K) < k2]) + mu * sum(pk[(1:K) > k1 & (1:K) < k2]) +
            - dnorm(cutoffs[k1,2] - mu) + dnorm(c1 - mu) - dnorm(cc2 - mu) + dnorm(cutoffs[k2,1] - mu) +
                mu * (pnorm.interval(mu,c(c1,cutoffs[k1,2])) + pnorm.interval(mu,c(cutoffs[k2,1], cc2))) -
                    const
    } else {
        - dnorm(cc2 - mu) + dnorm(c1 - mu) + mu * pnorm.interval(mu,c(c1,cc2)) - const
    }
}
## Compute g_mu(c1) for a vector of mu and c1 (see LaTeX documentation)
g.mu <- function(c1, mu, alpha, cutoffs,
                 pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
    sapply(1:length(c1),function(i)
           g.mu.single(c1[i], mu[i], alpha, cutoffs,
                       pk[i,,drop=FALSE], dk[i,,drop=FALSE]))
}

           
    
g.mu(-10.3,1,0.05, two.sided.cutoff(10))


          neg 
-1.803609e-21 

In [20]:
ten.mu = 1
print np.array(ten.G(-10.3, 0.05), np.float128)


-1.80360879724e-21

In [21]:
%%R

dnorm.cutoffs(c(0,0),two.sided.cutoff(10))
c.vals <- seq(-10.3,-10.28,.001)
plot(c.vals,g.mu(c.vals,rep(0,length(c.vals)), .05, two.sided.cutoff(10)))
g.mu(c.vals,rep(0,length(c.vals)), .05, two.sided.cutoff(10))


          neg           neg           neg           neg           neg 
-5.905076e-25 -5.147271e-25 -4.381626e-25 -3.608059e-25 -2.826490e-25 
          neg           neg           neg           neg           neg 
-2.036834e-25 -1.239009e-25 -4.329295e-26  3.814891e-26  1.204333e-25 
          neg           neg           neg           neg           neg 
 2.035691e-25  2.875649e-25  3.724297e-25  4.581725e-25  5.448025e-25 
          neg           neg           neg           neg           neg 
 6.323287e-25  7.207605e-25  8.101074e-25  9.003787e-25  9.915841e-25 
          neg 
 1.083733e-24 

In [22]:
right_endpoints = np.arange(-10.3,-10.28,0.001)
mus = np.zeros_like(right_endpoints)
Gvals = G(right_endpoints, mus, 0.05, ten)
f = plt.figure(figsize=(8,8))
ax = f.gca()
ax.plot(right_endpoints, Gvals)
np.array(Gvals)


Out[22]:
array([ -5.90507554e-25,  -5.14727091e-25,  -4.38162587e-25,
        -3.60805941e-25,  -2.82648970e-25,  -2.03683402e-25,
        -1.23900879e-25,  -4.32929536e-26,   3.81489112e-26,
         1.20433343e-25,   2.03569061e-25,   2.87564877e-25,
         3.72429699e-25,   4.58172529e-25,   5.44802465e-25,
         6.32328703e-25,   7.20760541e-25,   8.10107374e-25,
         9.00378701e-25,   9.91584123e-25,   1.08373335e-24])

In [23]:
%%R
g.mu(-10.2925, 0, .05, two.sided.cutoff(10))


          neg 
-2.676801e-27 

In [24]:
ten.mu = 0
print np.array(ten.G(-10.2925,0.05), np.float128)


-2.67680067788e-27

In [25]:
%%R
## Compute g_mu'(c1)
dg.mu <- function(c1, mu, alpha, cutoffs,
                  pk=pnorm.cutoffs(mu, cutoffs)) {
    (c2(c1, mu, alpha, cutoffs, pk) - c1) * dnorm(c1 - mu)
}

In [26]:
approxG = G(right_endpoints-0.001, mus, 0.05, ten) + dG(right_endpoints-0.001, mus, 0.05, ten) * 0.001
ax.scatter(right_endpoints, approxG)
ax.figure


Out[26]:

In [27]:
%%R
mu.vals <- seq(-10,15,.1)
plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))
g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10))
g.mu(-10.2925,2, .05, two.sided.cutoff(10))


          neg 
-1.107319e-17 

In [28]:
f = plt.figure(figsize=(8,8))
mus = np.arange(-10,15,0.1)
Gvals = G(np.ones_like(mus)*(-10.2925), mus, 0.05, ten)
plt.plot(mus, Gvals, linewidth=2, color='red')


Out[28]:
[<matplotlib.lines.Line2D at 0x111fab7d0>]

In [29]:
%%R
dg.mu(-10.2925, 8, .05, two.sided.cutoff(10))


         pos 
1.859287e-72 

In [30]:
ten.mu = 8
print np.array(ten.dG(-10.2925, 0.05), np.float128)


1.85928694146e-72

In [31]:
%%R
mu.vals <- seq(-.001,.001,.0001)
plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))



In [32]:
mus = np.arange(-0.001,0.001,0.0001)
Gvals = G(-10.2925*np.ones_like(mus), mus, 0.05, ten)
f = plt.figure(figsize=(8,8))
ax = f.gca()
ax.plot(mus, Gvals, linewidth=2, color='red')


Out[32]:
[<matplotlib.lines.Line2D at 0x111929f50>]

umau.normal.unit.var.upper.single


In [33]:
%%R

## Compute upper CI endpoint, for a single x, when sigma=1
umau.normal.unit.var.upper.single <- function(x, cutoffs, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
    check.cutoffs(cutoffs)

    mu.too.low <- function(mu) {
        g.mu(x,mu,alpha,cutoffs) > 0
    }
    mu.too.high <- function(mu) {
        g.mu(x,mu,alpha,cutoffs) < 0
    }

    while(mu.too.high(mu.lo)) {
        mu.hi <- mu.lo
        mu.lo <- mu.lo - 2
    }
    while(mu.too.low(mu.hi)) {
        mu.lo <- mu.hi
        mu.hi <- mu.hi + 2
    }
    while(mu.hi - mu.lo > tol) {
        mu.avg <- (mu.lo + mu.hi) / 2
        if(mu.too.high(mu.avg)) {
            mu.hi <- mu.avg
        } else {
            mu.lo <- mu.avg
        }
    }
    mu.avg
}


print(umau.normal.unit.var.upper.single(-10.29, two.sided.cutoff(10), mu.lo=-1, mu.hi=5))
print(umau.normal.unit.var.upper.single(-10.2925, two.sided.cutoff(10), mu.lo=-1, mu.hi=5))


[1] 0.03622952
[1] -0.0005936157

In [34]:
L, U = UMAU_interval(-10.29, 0.05, ten)
ten.mu = U
print U, ten.G(-10.29, 0.05)
ten_ng = ten.negated
ten_ng.mu = -L
print L, ten_ng.G(10.29, 0.05)


0.0362295201421 -2.03179614112e-32
-11.8430293062 1.35619515618e-10

Intervals


In [35]:
%%R

## Compute both CI endpoints, for a single x
umau.normal.single <- function(x, cutoffs, sigma=1, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
    mu.upper <- sigma * umau.normal.unit.var.upper.single(x/sigma, cutoffs/sigma, alpha,
                                                          mu.lo/sigma, mu.hi/sigma, tol)
    mu.lower <- -sigma * umau.normal.unit.var.upper.single(-x/sigma, negate.cutoffs(cutoffs)/sigma, alpha,
                                                           -mu.hi/sigma, -mu.lo/sigma, tol)
    return(c(mu.lower, mu.upper))
}

LU = umau.normal.single(10.29, two.sided.cutoff(10))
print(umau.normal.single(1+10.29, 1+two.sided.cutoff(10)))
print(umau.normal.single(-10.29, two.sided.cutoff(10)))
print(umau.normal.single(1-10.29, 1+two.sided.cutoff(10)))


[1]  0.9637705 12.8430293
[1] -11.84302931   0.03622952
[1] -10.84303   1.03623

In [36]:
%%R
# check the value of g.mu
LU = umau.normal.single(-10.29, two.sided.cutoff(10))
print(g.mu(10.29,-LU[1],0.05, negate.cutoffs(two.sided.cutoff(10))))
print(g.mu(-10.29,LU[2],0.05, two.sided.cutoff(10)))


         neg 
1.356195e-10 
          neg 
-2.031796e-32 

In [37]:
ten_shift = truncated_gaussian(ten.intervals+1)
L, U = UMAU_interval(1-10.29, 0.05, ten_shift)
print L, U


-10.8430293062 1.03622952014

In [38]:
ten_shift = truncated_gaussian(ten.intervals-1)
L, U = UMAU_interval(-1-10.29, 0.05, ten_shift)
print L, U


-12.8430293062 -0.963770479858

In [39]:
ten_shift = truncated_gaussian(ten.intervals)
L, U = UMAU_interval(10.29, 0.05, ten_shift)
print L, U


-0.0362295201421 11.8430293062

Plots


In [40]:
%%R -o X
X <- c(seq(-20,-14,1),seq(-13,-10.4,.1),seq(-10.3,-10.06,.02),seq(-10.05,-10.01,.01),seq(-10.01,-10,0.02))

In [41]:
intervals = []
for x in X:
    intervals.append(UMAU_interval(x, 0.05, ten))
intervals = np.array(intervals, np.float)

In [42]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
ax.plot(X, np.array(intervals)[:,0], linewidth=2, c='red')
ax.plot(X, np.array(intervals)[:,1], linewidth=2, c='red')
ax.plot(-X, -np.array(intervals)[:,0], linewidth=2, c='red')
ax.plot(-X, -np.array(intervals)[:,1], linewidth=2, c='red')
R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])


Out[42]:
(-22, 22)

In [43]:
ten_mid = truncated_gaussian([(-np.inf,-10),(-.1,0.1),(10,np.inf)])

In [44]:
%%R -o X2
X2 = seq(-0.1,0.,length=5)

In [45]:
mid_intervals = []
for x in X:
    mid_intervals.append(UMAU_interval(x, 0.05, ten_mid))
mid_intervals = np.array(mid_intervals, np.float)

mid_intervals2 = []
for x in X2:
    mid_intervals2.append(UMAU_interval(x, 0.05, ten_mid))
mid_intervals2 = np.array(mid_intervals2, np.float)

In [46]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
for Z, intervals in zip([X,X2],[mid_intervals, mid_intervals2]):
    ax.plot(Z, intervals[:,0], linewidth=2, c='red')
    ax.plot(Z, intervals[:,1], linewidth=2, c='red')
    ax.plot(-Z, -intervals[:,0], linewidth=2, c='red')
    ax.plot(-Z, -intervals[:,1], linewidth=3, c='red')
R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])
xlim = ax.get_xlim()



In [47]:
%%R -o X1
X1 = c(seq(10.14,10.3,.01),seq(10.4,11,.1),12:20)

In [48]:
one_sided = truncated_gaussian([(10,np.inf)])

In [49]:
one_intervals = []
for x in X1:
    one_intervals.append(UMAU_interval(x, 0.05, one_sided))
one_intervals = np.array(one_intervals, np.float)

In [50]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
ax.plot(X1, np.array(one_intervals)[:,0], linewidth=2, c='red')
ax.plot(X1, np.array(one_intervals)[:,1], linewidth=2, c='red')

R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])
ax.set_xlim(xlim)


Out[50]:
(-20.0, 20.0)