In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb
In [2]:
for stage in config['stages']:
fp = 'atac814_geo/tracks/atac_%s.bw' % (stage,)
!md5sum {fp} | awk '{{print $$1}}'
In [3]:
# file checksum -- glp1, read1 only
for sample in list(config['atac814_se'].keys()):
fp = pf('atac814_' + sample, 'md5sum_r1', '.txt', 'atac814')
!cat {fp} | awk '{{print $$1}}'
In [4]:
# file checksum -- glp1, read1 only
for sample in list(config['atac814_pe'].keys()):
fp = pf('atac814_' + sample, 'md5sum_r1', '.txt', 'atac814')
!cat {fp} | awk '{{print $$1}}'
In [5]:
# file checksum -- glp1, read1 only
for sample in list(config['atac814_pe'].keys()):
fp = pf('atac814_' + sample, 'md5sum_r2', '.txt', 'atac814')
!cat {fp} | awk '{{print $$1}}'
In [6]:
# readlen -- glp1, read1 only
for sample in list(config['atac814_se'].keys()):
fp = pf('atac814_' + sample, 'readlen_r1', '.txt', 'atac814')
!cat {fp} #| awk '{{print $$1}}'
print()
In [7]:
# readlen -- wt, read1
for sample in list(config['atac814_pe'].keys()):
fp = pf('atac814_' + sample, 'readlen_r1', '.txt', 'atac814')
!cat {fp} #| awk '{{print $$1}}'
print()
In [8]:
# readlen -- wt, read2
for sample in list(config['atac814_pe'].keys()):
fp = pf('atac814_' + sample, 'readlen_r2', '.txt', 'atac814')
!cat {fp} #| awk '{{print $$1}}'
print()
In [9]:
# mean read length
for sample in itertools.islice(config['atac814_pe'].keys(), None):
fp = pf('atac814_' + sample, 'tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm_blacklist.rm_q10.fsizes', '.txt', 'atac814')
df = pd.read_csv(fp, sep='\t', names=['size', 'count'])#.head()
l_len = list(itertools.chain(*[itertools.repeat(size, count) for (size, count) in zip(df['size'], df['count'])]))
mean1 = np.average(df['size'], weights=df['count'])
#mean2 = np.mean(l_len)
#print(mean1, mean2)
print('%d' % (mean1,))
#print('%s %d' % (sample, mean1))
In [10]:
for sample in itertools.islice(config['atac814_pe'].keys(), None):
fp = pf('atac814_' + sample, 'tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm_blacklist.rm_q10.fsizes', '.txt', 'atac814')
df = pd.read_csv(fp, sep='\t', names=['size', 'count'])#.head()
l_len = list(itertools.chain(*[itertools.repeat(size, count) for (size, count) in zip(df['size'], df['count'])]))
print('%d' % (np.std(l_len),))
In [ ]: