In [1]:
import numpy as np
from selection.truncated import *
import nose.tools as nt
%load_ext rmagic
%pylab inline
This is my attempt to port Will's UMPU to python. First I will see if I can just run it from python:
In [2]:
%%R
### Construct UMAU intervals for a truncated Gaussian
## Simple functions for specifying and checking a "cutoffs" object, which
## is just a matrix, each of whose rows is a non-empty real interval (infinite endpoints allowed)
check.cutoffs <- function(cutoffs) {
if(!is.matrix(cutoffs) || dim(cutoffs)[2] != 2) stop("cutoffs should be a matrix with 2 columns")
if(sum(cutoffs[,2] <= cutoffs[,1]) > 0) stop("all right endpoints should be > left endpoints")
if(sum(diff(c(t(cutoffs))) <= 0) > 0) stop("endpoints should be strictly increasing")
}
In [3]:
%%R
negate.cutoffs <- function(cutoffs) {
-cutoffs[nrow(cutoffs):1,2:1,drop=FALSE]
}
negate.cutoffs(rbind(c(-Inf,-4),c(-3,-2),c(7,Inf)))
In [4]:
tg = truncated_gaussian([(-np.inf,-4), (-3,-2), (7, np.inf)])
tg.negated.intervals
Out[4]:
In [5]:
%%R
two.sided.cutoff <- function(x) rbind(neg=c(-Inf,-abs(x)),pos=c(abs(x),Inf))
two.sided.cutoff(3)
In [6]:
three = truncated_gaussian.twosided(3)
three.intervals
Out[6]:
In [7]:
%%R
## Compute Phi(b-mu) - Phi(a-mu) in a numerically robust way
## mu can be a vector
pnorm.interval <- function(mu, ab) {
ifelse(mean(ab) - mu < 0,
pnorm(ab[2] - mu) - pnorm(ab[1] - mu),
pnorm(mu - ab[1]) - pnorm(mu - ab[2]))
}
## Compute Phi(b-mu) - Phi(a-mu) for each [a,b] in S
pnorm.cutoffs <- function(mu, cutoffs) {
ret <- apply(cutoffs, 1, function(cut) pnorm.interval(mu, cut))
if(!is.matrix(ret)) ret <- t(ret)
dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
ret
}
print(pnorm.cutoffs(-1:1,two.sided.cutoff(3)))
print(pnorm.cutoffs(-1,two.sided.cutoff(3)))
In [8]:
for mu in [-1,0,1]:
three.mu = mu
print np.array(three.P, np.float128)
In [9]:
%%R
## Compute phi(b-mu) - phi(a-mu) for each [a,b] in S
## mu can be a vector
dnorm.cutoffs <- function(mu, cutoffs) {
ret <- apply(cutoffs, 1, function(cut) dnorm(cut[2] - mu) - dnorm(cut[1] - mu))
if(!is.matrix(ret)) ret <- t(ret)
dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
ret
}
print(dnorm.cutoffs(-1:1,two.sided.cutoff(3)))
print(dnorm.cutoffs(-1,two.sided.cutoff(3)))
In [10]:
for mu in [-1,0,1]:
three.mu = mu
print np.array(three.D[:,1] - three.D[:,0], np.float128)
In [11]:
%%R
## Compute P_mu(X<x)
## mu CANNOT be a vector, pk is a one-row matrix or vector
## x must be in one of the intervals
F.mu <- function(x, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
K <- length(pk)
p <- sum(pk)
k <- which(x >= cutoffs[,1] & x <= cutoffs[,2])
stopifnot(length(k)==1)
(sum(pk[(1:K) < k]) + pnorm.interval(mu, c(cutoffs[k,1], x)) ) / p
}
F.mu(-10.01,0,two.sided.cutoff(10))
In [12]:
%%R
F.mu(-9,0,two.sided.cutoff(10))
In [13]:
%%R
F.mu(10.2,0,two.sided.cutoff(10))
In [14]:
ten = truncated_gaussian.twosided(10)
print np.array(ten.CDF(-10.01), np.float128)
In [15]:
nt.assert_raises(ValueError, ten.CDF, (-9,))
In [16]:
print np.array(ten.CDF(10.2), np.float128)
In [17]:
%%R
## Compute the inverse of the previous function
## mu CANNOT be a vector, pk is a one-row matrix or vector
F.inv.mu <- function(F, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
p <- sum(pk)
k <- max(which(c(0,cumsum(pk))/p < F))
pnorm.increment <- p*F - c(0,cumsum(pk))[k]
if(mean(cutoffs[k,]) < 0)
mu + qnorm(pnorm(cutoffs[k,1]-mu) + pnorm.increment)
else
mu + qnorm(pnorm(cutoffs[k,1]-mu,lower.tail=FALSE) - pnorm.increment,lower.tail=FALSE)
}
## Compute c2(c1) for a single c1
## mu CANNOT be a vector, pk is a one-row matrix or vector
c2.single <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
K <- length(pk)
alpha1 <- F.mu(c1, mu, cutoffs, pk)
if(alpha1 > alpha) return(NA)
alpha2 <- alpha-alpha1
return(F.inv.mu(1-alpha2, mu, cutoffs, pk))
}
c2.single(-10.3, 0, .05, two.sided.cutoff(10))
F.mu(-10.3, 0, two.sided.cutoff(10))
## Do the same, for a vector of c1 and mu
c2 <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
sapply(1:length(c1),function(i)
c2.single(c1[i], mu[i], alpha, cutoffs,
pk[i,,drop=FALSE]))
}
c2(-10.3, 0, .05, two.sided.cutoff(10))
In [18]:
ten.mu = 0
print np.array(ten.right_endpoint(-10.3, 0.05), np.float128)
In [19]:
%%R
## Compute g_mu(c1) for a single mu and c1 (see LaTeX documentation)
## c1 and mu CANNOT be vectors, pk is NOT a matrix
g.mu.single <- function(c1, mu, alpha, cutoffs,
pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
const <- (1-alpha) * (sum(-dk) + mu * sum(pk))
cc2 <- c2(c1, mu, alpha, cutoffs, pk)
if(is.na(cc2)) return(Inf)
K <- length(pk)
p <- sum(pk)
k1 <- which(c1 >= cutoffs[,1] & c1 <= cutoffs[,2])
stopifnot(length(k1)==1)
k2 <- which(cc2 >= cutoffs[,1] & cc2 <= cutoffs[,2])
stopifnot(length(k2)==1)
if(k1 < k2) {
sum(-dk[(1:K) > k1 & (1:K) < k2]) + mu * sum(pk[(1:K) > k1 & (1:K) < k2]) +
- dnorm(cutoffs[k1,2] - mu) + dnorm(c1 - mu) - dnorm(cc2 - mu) + dnorm(cutoffs[k2,1] - mu) +
mu * (pnorm.interval(mu,c(c1,cutoffs[k1,2])) + pnorm.interval(mu,c(cutoffs[k2,1], cc2))) -
const
} else {
- dnorm(cc2 - mu) + dnorm(c1 - mu) + mu * pnorm.interval(mu,c(c1,cc2)) - const
}
}
## Compute g_mu(c1) for a vector of mu and c1 (see LaTeX documentation)
g.mu <- function(c1, mu, alpha, cutoffs,
pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
sapply(1:length(c1),function(i)
g.mu.single(c1[i], mu[i], alpha, cutoffs,
pk[i,,drop=FALSE], dk[i,,drop=FALSE]))
}
g.mu(-10.3,1,0.05, two.sided.cutoff(10))
In [20]:
ten.mu = 1
print np.array(ten.G(-10.3, 0.05), np.float128)
In [21]:
%%R
dnorm.cutoffs(c(0,0),two.sided.cutoff(10))
c.vals <- seq(-10.3,-10.28,.001)
plot(c.vals,g.mu(c.vals,rep(0,length(c.vals)), .05, two.sided.cutoff(10)))
g.mu(c.vals,rep(0,length(c.vals)), .05, two.sided.cutoff(10))
In [22]:
right_endpoints = np.arange(-10.3,-10.28,0.001)
mus = np.zeros_like(right_endpoints)
Gvals = G(right_endpoints, mus, 0.05, ten)
f = plt.figure(figsize=(8,8))
ax = f.gca()
ax.plot(right_endpoints, Gvals)
np.array(Gvals)
Out[22]:
In [23]:
%%R
g.mu(-10.2925, 0, .05, two.sided.cutoff(10))
In [24]:
ten.mu = 0
print np.array(ten.G(-10.2925,0.05), np.float128)
In [25]:
%%R
## Compute g_mu'(c1)
dg.mu <- function(c1, mu, alpha, cutoffs,
pk=pnorm.cutoffs(mu, cutoffs)) {
(c2(c1, mu, alpha, cutoffs, pk) - c1) * dnorm(c1 - mu)
}
In [26]:
approxG = G(right_endpoints-0.001, mus, 0.05, ten) + dG(right_endpoints-0.001, mus, 0.05, ten) * 0.001
ax.scatter(right_endpoints, approxG)
ax.figure
Out[26]:
In [27]:
%%R
mu.vals <- seq(-10,15,.1)
plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))
g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10))
g.mu(-10.2925,2, .05, two.sided.cutoff(10))
In [28]:
f = plt.figure(figsize=(8,8))
mus = np.arange(-10,15,0.1)
Gvals = G(np.ones_like(mus)*(-10.2925), mus, 0.05, ten)
plt.plot(mus, Gvals, linewidth=2, color='red')
Out[28]:
In [29]:
%%R
dg.mu(-10.2925, 8, .05, two.sided.cutoff(10))
In [30]:
ten.mu = 8
print np.array(ten.dG(-10.2925, 0.05), np.float128)
In [31]:
%%R
mu.vals <- seq(-.001,.001,.0001)
plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))
In [32]:
mus = np.arange(-0.001,0.001,0.0001)
Gvals = G(-10.2925*np.ones_like(mus), mus, 0.05, ten)
f = plt.figure(figsize=(8,8))
ax = f.gca()
ax.plot(mus, Gvals, linewidth=2, color='red')
Out[32]:
In [33]:
%%R
## Compute upper CI endpoint, for a single x, when sigma=1
umau.normal.unit.var.upper.single <- function(x, cutoffs, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
check.cutoffs(cutoffs)
mu.too.low <- function(mu) {
g.mu(x,mu,alpha,cutoffs) > 0
}
mu.too.high <- function(mu) {
g.mu(x,mu,alpha,cutoffs) < 0
}
while(mu.too.high(mu.lo)) {
mu.hi <- mu.lo
mu.lo <- mu.lo - 2
}
while(mu.too.low(mu.hi)) {
mu.lo <- mu.hi
mu.hi <- mu.hi + 2
}
while(mu.hi - mu.lo > tol) {
mu.avg <- (mu.lo + mu.hi) / 2
if(mu.too.high(mu.avg)) {
mu.hi <- mu.avg
} else {
mu.lo <- mu.avg
}
}
mu.avg
}
print(umau.normal.unit.var.upper.single(-10.29, two.sided.cutoff(10), mu.lo=-1, mu.hi=5))
print(umau.normal.unit.var.upper.single(-10.2925, two.sided.cutoff(10), mu.lo=-1, mu.hi=5))
In [34]:
L, U = UMAU_interval(-10.29, 0.05, ten)
ten.mu = U
print U, ten.G(-10.29, 0.05)
ten_ng = ten.negated
ten_ng.mu = -L
print L, ten_ng.G(10.29, 0.05)
In [35]:
%%R
## Compute both CI endpoints, for a single x
umau.normal.single <- function(x, cutoffs, sigma=1, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
mu.upper <- sigma * umau.normal.unit.var.upper.single(x/sigma, cutoffs/sigma, alpha,
mu.lo/sigma, mu.hi/sigma, tol)
mu.lower <- -sigma * umau.normal.unit.var.upper.single(-x/sigma, negate.cutoffs(cutoffs)/sigma, alpha,
-mu.hi/sigma, -mu.lo/sigma, tol)
return(c(mu.lower, mu.upper))
}
LU = umau.normal.single(10.29, two.sided.cutoff(10))
print(umau.normal.single(1+10.29, 1+two.sided.cutoff(10)))
print(umau.normal.single(-10.29, two.sided.cutoff(10)))
print(umau.normal.single(1-10.29, 1+two.sided.cutoff(10)))
In [36]:
%%R
# check the value of g.mu
LU = umau.normal.single(-10.29, two.sided.cutoff(10))
print(g.mu(10.29,-LU[1],0.05, negate.cutoffs(two.sided.cutoff(10))))
print(g.mu(-10.29,LU[2],0.05, two.sided.cutoff(10)))
In [37]:
ten_shift = truncated_gaussian(ten.intervals+1)
L, U = UMAU_interval(1-10.29, 0.05, ten_shift)
print L, U
In [38]:
ten_shift = truncated_gaussian(ten.intervals-1)
L, U = UMAU_interval(-1-10.29, 0.05, ten_shift)
print L, U
In [39]:
ten_shift = truncated_gaussian(ten.intervals)
L, U = UMAU_interval(10.29, 0.05, ten_shift)
print L, U
In [40]:
%%R -o X
X <- c(seq(-20,-14,1),seq(-13,-10.4,.1),seq(-10.3,-10.06,.02),seq(-10.05,-10.01,.01),seq(-10.01,-10,0.02))
In [41]:
intervals = []
for x in X:
intervals.append(UMAU_interval(x, 0.05, ten))
intervals = np.array(intervals, np.float)
In [42]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
ax.plot(X, np.array(intervals)[:,0], linewidth=2, c='red')
ax.plot(X, np.array(intervals)[:,1], linewidth=2, c='red')
ax.plot(-X, -np.array(intervals)[:,0], linewidth=2, c='red')
ax.plot(-X, -np.array(intervals)[:,1], linewidth=2, c='red')
R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])
Out[42]:
In [43]:
ten_mid = truncated_gaussian([(-np.inf,-10),(-.1,0.1),(10,np.inf)])
In [44]:
%%R -o X2
X2 = seq(-0.1,0.,length=5)
In [45]:
mid_intervals = []
for x in X:
mid_intervals.append(UMAU_interval(x, 0.05, ten_mid))
mid_intervals = np.array(mid_intervals, np.float)
mid_intervals2 = []
for x in X2:
mid_intervals2.append(UMAU_interval(x, 0.05, ten_mid))
mid_intervals2 = np.array(mid_intervals2, np.float)
In [46]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
for Z, intervals in zip([X,X2],[mid_intervals, mid_intervals2]):
ax.plot(Z, intervals[:,0], linewidth=2, c='red')
ax.plot(Z, intervals[:,1], linewidth=2, c='red')
ax.plot(-Z, -intervals[:,0], linewidth=2, c='red')
ax.plot(-Z, -intervals[:,1], linewidth=3, c='red')
R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])
xlim = ax.get_xlim()
In [47]:
%%R -o X1
X1 = c(seq(10.14,10.3,.01),seq(10.4,11,.1),12:20)
In [48]:
one_sided = truncated_gaussian([(10,np.inf)])
In [49]:
one_intervals = []
for x in X1:
one_intervals.append(UMAU_interval(x, 0.05, one_sided))
one_intervals = np.array(one_intervals, np.float)
In [50]:
f = plt.figure(figsize=(10,10))
ax = f.gca()
ax.plot(X1, np.array(one_intervals)[:,0], linewidth=2, c='red')
ax.plot(X1, np.array(one_intervals)[:,1], linewidth=2, c='red')
R = -np.fabs(X).max(), np.fabs(X).max()
ax.plot([R[0],R[1]],[0,0], 'k--')
ax.plot([R[0],R[1]],[R[0],R[1]], 'k--')
ax.plot([-10,-10], [-100, 100], 'k--')
ax.plot([10,10], [-100, 100], 'k--')
ax.set_ylim([-22,22])
ax.set_xlim(xlim)
Out[50]: