In [1]:
import pickle as _pkl
import numpy as _nmp
import pandas as _pnd
import matplotlib.pyplot as _plt
%matplotlib inline
In [2]:
mdls = {
'logit': 'Normal.logit',
'arcsin': 'Normal.arcsin',
'log': 'Normal.log',
'bcox': 'Normal.boxcox',
'blom': 'Normal.blom',
'voom': 'Normal.voom',
'vst': 'Normal.vst',
'bin': 'Binomial.none',
'nbin': 'NBinomial.none',
'pois': 'Poisson.none'
}
In [7]:
## load results
def load_results():
##
res, hits = {}, {}
fin_dir = '/home/dimitris/WTCHG/Projects/eQTLseq/results/geuvadis/raw'
for key in mdls:
fin = '{}/TF.common.HIGH.{}.452.{}.pkl'.format(fin_dir, 'protein_coding', mdls[key])
print(fin)
with open(fin, 'rb') as fh:
res[key] = _pkl.load(fh)
beta = res[key]['beta']
thr = 0.25 #if key in ('bin', 'nbin', 'pois') else 1e-6
idxs = _nmp.abs(beta) > thr * _nmp.max(_nmp.abs(beta))
beta[~idxs] = 0
hits[key] = idxs
##
return res, hits
##
res, hits = load_results()
In [8]:
## plot
mdl = 'logit'
_plt.figure(figsize = (15,10));
_plt.subplot(4,1,1); _plt.plot(res[mdl]['state'][10:]); _plt.xlabel('iteration'); _plt.ylabel('state')
_plt.subplot(4,1,2); _plt.vlines(range(res[mdl]['beta'].size), 0, res[mdl]['beta']); _plt.xlabel('markers x genes'); _plt.ylabel('effect size')
_plt.axhline(linestyle='--', color='k');
_plt.subplot(4,1,3); _plt.vlines(range(res[mdl]['beta_var'].size), 0, res[mdl]['beta_var']); _plt.xlabel('markers x genes'); _plt.ylabel('var')
_plt.axhline(linestyle='--', color='k');
In [9]:
## find interesection
hits_common = (
hits['arcsin'] &
hits['logit'] &
hits['log'] &
hits['blom'] &
hits['bcox'] &
hits['voom'] &
hits['vst'] &
hits['pois'] &
hits['bin'] &
hits['nbin']
)
hits_common.nonzero()
Out[9]:
In [10]:
## save data
def save_data():
##
fout_dir = '/home/dimitris/WTCHG/Projects/eQTLseq/results/geuvadis/hits'
for key in mdls:
fout = '{}/TF.common.HIGH.{}.452.{}.hits.txt'.format(fout_dir, 'protein_coding', mdls[key])
print(fout)
_nmp.savetxt(fout, hits[key], fmt='%d')
##
save_data()
In [ ]:
# load data
def load_data():
data = {
'G': _pnd.read_table('/home/dimitris/WTCHG/Projects/eQTLseq/data/geuvadis/genotypes.TF.common.HIGH.no_missing.txt', index_col=0, header=None),
'Z': _pnd.read_table('/home/dimitris/WTCHG/Projects/eQTLseq/data/geuvadis/counts_miRNA.txt', index_col=0)
}
samples = data['G'].index & data['Z'].columns
data['Z'] = data['Z'][samples].values
data['G'] = data['G'].loc[samples].values
##
return data
##
data = load_data()
In [ ]:
## add info
def modify_data():
Z, G = data['Z'], data['G']
idxs_genes = Z.mean(1) > 10 # keep only expressed tags
idxs_markers = _nmp.std(G, 0) > 0 # keep only non-monomorphic loci
data_mod = {
'Z': Z,
'G': G,
'idxs_genes': idxs_genes,
'idxs_markers': idxs_markers
}
with open('/home/dimitris/WTCHG/Projects/eQTLseq/data/geuvadis/TF.common.HIGH.tfs.452.pkl', 'wb') as fh:
_pkl.dump(data_mod, fh)
##
modify_data()
In [ ]:
res['log']['beta_var']
In [ ]: