In [1]:
# show all warnings
import warnings as _wrn
_wrn.filterwarnings('always')
In [134]:
# imports
import sys
import pickle as pkl
import numpy as nmp
import numpy.random as rnd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
%matplotlib inline
plt.style.use('ggplot')
In [3]:
# import eQTLseq
sys.path.append('../')
import eQTLseq as seq
In [144]:
## load data
with open('../data/simdata_1_8_0_0.pkl', 'rb') as fh:
data = pkl.load(fh)
Z = data['Z'] # simulated expression data
G = data['G'] # simulated genotypes
B = data['beta'] # the true matrix of gene/variant associations
## print/plot info
print(Z.shape) # samples x genes
print(G.shape) # samples x genetic markers
print(B.shape) # genes x genetic markers
plt.figure();
plt.imshow(B, cmap=plt.cm.Greys_r);
plt.xlabel('genetic markers');
plt.ylabel('genes');
# plt.savefig('../figs/fig1.png')
In [145]:
## Estimate model
Z_norm = Z / seq.calculate_norm_factors(Z) # normalize data
Z_trans = seq.transform_data(Z_norm, kind='log') # transform data
rnd.seed(0)
res = seq.run(Z_trans.T, G, n_iters = 4000, burnin=0.5, model='Normal', n_threads=1, scaleG=True)
In [146]:
## normalize beta
Bhat = res['beta']
Bhat = Bhat / nmp.abs(Bhat).sum()
Bnorm = B / nmp.abs(B).sum()
## plot
plt.figure(figsize=(15,10))
plt.subplot(2,1,1);
plt.plot(res['state'][1:]); plt.xlabel('iteration'); plt.ylabel('state')
plt.subplot(2,1,2);
plt.vlines(range(Bnorm.size), 0, Bnorm.ravel());
plt.axhline(linestyle='--', color='k');
plt.plot(Bhat.ravel(), 'r.'); plt.xlabel('markers x genes'); plt.ylabel('effect size');
## compute metrics
metrics = seq.calculate_metrics(Bhat, Bnorm, beta_thr=0.25)
print([metrics[_] for _ in ('MCC', 'FDR')])
plt.tight_layout()
plt.savefig('../figs/fig3.png')
In [46]:
whos
In [ ]: