In [22]:
%matplotlib inline
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from matplotlib import pylab as pl
import numpy as np
import pandas as pd
import seaborn as sns
import h5py
from matplotlib.backends.backend_pdf import PdfPages
from bx.bbi.bigwig_file import BigWigFile
import json
import os
import itertools
import numpy as np
import six
# import bcolz
# from pysam import FastaFile
# import roman as rm
import pandas as pd
import pdb
import numpy as np
import math
In [25]:
bigwigs = ['FIDDLE_alpha/data/bigwigs/tssseq_1_pos.bw',
'FIDDLE_alpha/data/bigwigs/tssseq_1_neg.bw',
'FIDDLE_alpha/data/bigwigs/tssseq_2_pos.bw',
'FIDDLE_alpha/data/bigwigs/tssseq_2_neg.bw',
'FIDDLE_alpha/data/bigwigs/tssseq_3_pos.bw',
'FIDDLE_alpha/data/bigwigs/tssseq_3_neg.bw',
'FIDDLE_alpha/data/bigwigs/netseq_pos.bw',
'FIDDLE_alpha/data/bigwigs/netseq_neg.bw',
'FIDDLE_alpha/data/bigwigs/mnaseseq_pos.bw',
'FIDDLE_alpha/data/bigwigs/mnaseseq_neg.bw',
'FIDDLE_alpha/data/bigwigs/rnaseq_pos.bw',
'FIDDLE_alpha/data/bigwigs/rnaseq_neg.bw',
'FIDDLE_alpha/data/bigwigs/tfiib_pos.bw',
'FIDDLE_alpha/data/bigwigs/tfiib_neg.bw',]
In [28]:
with open(u'/Users/marshall/Desktop/fiddle/FIDDLE_alpha/data/sacCer3.chrom.sizes', 'r') as f:
chr_sizes = {line.split('\t')[0]: (1, int(line.split('\t')[-1].split('\n')[0])) for line in f.readlines()}
In [84]:
# results = summed up positions where NaNs exist
bw = [BigWigFile(open(bigwig, 'r')) for bigwig in bigwigs]
results = {None : None}
for chrom, length in six.iteritems(chr_sizes):
results[chrom] = np.zeros(length[1])
for chrom, length in six.iteritems(chr_sizes):
for i in range(len(bw)):
extract_array = bw[i].get_as_array(chrom, 0, length[1])
nan_bool_array = pd.isnull(extract_array)
nan_int_array = 1 * nan_bool_array
results[chrom] += nan_int_array
In [85]:
results
Out[85]:
In [34]:
inter1 = np.isnan(data)
result = inter1.astype(int)
print(result[:, 11400:11500])
finalR = np.sum(result, axis = 0)
print(finalR[11400:11500])
finalR.shape
# pl.plot(result)
In [29]:
results = np.empty((data.shape[0], 2)) # isNan = 0, notNan = 1
results.shape
for i in range(data.shape[0]):
for j in range(data.shape[1]):
if np.isnan(data[i,j]):
results[i, 0] += 1
else:
results[i, 1] += 1
In [ ]:
np.divide(results, 813184, results)
np.multiply(results, 100, results)
np.set_printoptions(precision=2)
print(results) # NaN composition as percentage for .bw files
In [ ]: