In [22]:
%matplotlib inline
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from matplotlib import pylab as pl
import numpy as np
import pandas as pd
import seaborn as sns
import h5py
from matplotlib.backends.backend_pdf import PdfPages
from bx.bbi.bigwig_file import BigWigFile
import json
import os
import itertools
import numpy as np
import six
# import bcolz
# from pysam import FastaFile
# import roman as rm
import pandas as pd
import pdb
import numpy as np
import math

In [25]:
bigwigs = ['FIDDLE_alpha/data/bigwigs/tssseq_1_pos.bw',
           'FIDDLE_alpha/data/bigwigs/tssseq_1_neg.bw',
           'FIDDLE_alpha/data/bigwigs/tssseq_2_pos.bw',
           'FIDDLE_alpha/data/bigwigs/tssseq_2_neg.bw',
           'FIDDLE_alpha/data/bigwigs/tssseq_3_pos.bw',
           'FIDDLE_alpha/data/bigwigs/tssseq_3_neg.bw',
           'FIDDLE_alpha/data/bigwigs/netseq_pos.bw', 
           'FIDDLE_alpha/data/bigwigs/netseq_neg.bw',
           'FIDDLE_alpha/data/bigwigs/mnaseseq_pos.bw',
           'FIDDLE_alpha/data/bigwigs/mnaseseq_neg.bw',
           'FIDDLE_alpha/data/bigwigs/rnaseq_pos.bw',
           'FIDDLE_alpha/data/bigwigs/rnaseq_neg.bw',
           'FIDDLE_alpha/data/bigwigs/tfiib_pos.bw',
           'FIDDLE_alpha/data/bigwigs/tfiib_neg.bw',]

In [28]:
with open(u'/Users/marshall/Desktop/fiddle/FIDDLE_alpha/data/sacCer3.chrom.sizes', 'r') as f:
    chr_sizes = {line.split('\t')[0]: (1, int(line.split('\t')[-1].split('\n')[0])) for line in f.readlines()}

In [84]:
# results = summed up positions where NaNs exist
bw = [BigWigFile(open(bigwig, 'r')) for bigwig in bigwigs]
results = {None : None}

for chrom, length in six.iteritems(chr_sizes):
    results[chrom] = np.zeros(length[1])

for chrom, length in six.iteritems(chr_sizes):
    for i in range(len(bw)):
        extract_array = bw[i].get_as_array(chrom, 0, length[1])
        nan_bool_array = pd.isnull(extract_array)
        nan_int_array = 1 * nan_bool_array
        results[chrom] += nan_int_array

In [85]:
results


Out[85]:
{None: None,
 'chrI': array([  0.,   0.,   0., ...,  11.,  11.,  12.]),
 'chrII': array([  6.,   6.,   6., ...,  12.,  12.,  13.]),
 'chrIII': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrIV': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrIX': array([  6.,   6.,   6., ...,  11.,  12.,  13.]),
 'chrM': array([  6.,   6.,   6., ...,  13.,  13.,  13.]),
 'chrV': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrVI': array([  6.,   6.,   6., ...,  11.,  11.,  11.]),
 'chrVII': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrVIII': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrX': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrXI': array([  6.,   6.,   6., ...,  11.,  12.,  12.]),
 'chrXII': array([  6.,   6.,   6., ...,  14.,  14.,  14.]),
 'chrXIII': array([  6.,   6.,   6., ...,  12.,  12.,  13.]),
 'chrXIV': array([  6.,   6.,   6., ...,  12.,  12.,  12.]),
 'chrXV': array([  6.,   6.,   6., ...,  11.,  11.,  12.]),
 'chrXVI': array([  6.,   6.,   6., ...,  14.,  14.,  14.])}

In [34]:
inter1 = np.isnan(data)
result = inter1.astype(int)
print(result[:, 11400:11500])
finalR = np.sum(result, axis = 0)
print(finalR[11400:11500])
finalR.shape
# pl.plot(result)

In [29]:
results = np.empty((data.shape[0], 2)) # isNan = 0, notNan = 1
results.shape
for i in range(data.shape[0]):
    for j in range(data.shape[1]):
        if np.isnan(data[i,j]):
            results[i, 0] += 1
        else:
            results[i, 1] += 1


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-29-960126a5adbd> in <module>()
      2 results.shape
      3 for i in range(data.shape[0]):
----> 4     for j in range(data.shape[1]):
      5         if np.isnan(data[i,j]):
      6             results[i, 0] += 1

KeyboardInterrupt: 

In [ ]:
np.divide(results, 813184, results)
np.multiply(results, 100, results)
np.set_printoptions(precision=2)
print(results) # NaN composition as percentage for .bw files

In [ ]: