In [3]:
# set some ipython notebook properties
%matplotlib inline
# set degree of verbosity (adapt to INFO for more verbose output)
import logging
logging.basicConfig(level=logging.WARNING)
# set figure sizes
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
# set display width for pandas data frames
import pandas as pd
pd.set_option('display.width', 1000)
import numpy as np
from fastlmm.pyplink.snpreader.Bed import Bed
from fastlmm.association.PrecomputeLocoPcs import load_intersect
from fastlmm.inference.lmm_cov import LMM as fastLMM
bed_fn = "../../tests/datasets/mouse/alldata"
pheno_fn = "../../tests/datasets/mouse/pheno_10_causals.txt"
snp_reader = Bed(bed_fn)
G, y, _, _ = load_intersect(snp_reader, pheno_fn)
num = 20
h2_log_delta = [1.0 / (np.exp(x) + 1) for x in np.linspace(-5, 10, num)]
h2_linspace = np.linspace(0,0.999,num=num)
lmm = fastLMM(Y=y, G=G, K=None)
nll_log_delta = []
for h2 in h2_log_delta:
res = lmm.nLLeval(h2=h2)
nll_log_delta.append(res["nLL"])
nll_linspace = []
for h2 in h2_linspace:
res = lmm.nLLeval(h2=h2)
nll_linspace.append(res["nLL"])
# scale kernel for comparison
K = G.dot(G.T)
K /= np.sum(np.diag(K))
K *= K.shape[0]
lmm = fastLMM(Y=y, K=K)
nll_h2_norm = []
for h2 in h2_linspace:
res = lmm.nLLeval(h2=h2)
nll_h2_norm.append(res["nLL"])
print "min nll log-delta", min(nll_log_delta)
print "min nll h2", min(nll_linspace)
print "min nll h2 (norm)", min(nll_h2_norm)
import pylab
pylab.semilogx(h2_log_delta, nll_log_delta, "-x", label="log_delta")
pylab.semilogx(h2_linspace, nll_linspace, "-o", label="h2")
pylab.semilogx(h2_linspace, nll_h2_norm, "-x", label="h2_norm")
pylab.title("log space x")
pylab.legend(loc="best")
pylab.show()
pylab.plot(h2_log_delta, nll_log_delta, "-x", label="log_delta")
pylab.plot(h2_linspace, nll_linspace, "-o", label="h2")
pylab.plot(h2_linspace, nll_h2_norm, "-x", label="h2_norm")
pylab.title("lin space x")
pylab.legend(loc="best")
pylab.show()
Conclusion: There does not seem to be an advantage in optimizing in log-space if kernels have been scaled.