In [1]:
import pandas as pd
import os
import sys
import pybedtools

In [26]:
pybedtools.cleanup(remove_all=True)

In [27]:
chromsizes = {'NC_000913.3': (0, 4641651)}
arcz = pybedtools.BedTool('../results/arcz.fixed.bed')
dsra = pybedtools.BedTool('../results/dsra.fixed.bed')
rpra = pybedtools.BedTool('../results/rpra.fixed.bed')
utr5 = pybedtools.BedTool('../results/redux/wt_tmut.utrs.corr.old.bed')

In [28]:
utr5_arcz = utr5.intersect(arcz, wa=True, wb=True)

In [29]:
utr5_arcz.head()


NC_000913.3	83708	84024	-	leuL	NC_000913.3	83823	83841	b0075
 NC_000913.3	103561	103982	+	ftsA	NC_000913.3	103761	103766	b0093
 NC_000913.3	310671	310797	-	ecpA	NC_000913.3	310677	310683	b0293
 NC_000913.3	320001	320215	-	ykgC	NC_000913.3	320209	320232	b0305
 NC_000913.3	346337	346435	-	yahN	NC_000913.3	346344	346359	b0329
 NC_000913.3	346337	346436	-	yahN	NC_000913.3	346344	346359	b0329
 NC_000913.3	475245	475379	+	ybaY	NC_000913.3	475246	475272	b0453
 NC_000913.3	540359	540565	+	ybbY	NC_000913.3	540465	540482	b0513
 NC_000913.3	742619	742827	+	ybgI	NC_000913.3	742693	742698	b0710
 NC_000913.3	755404	755560	+	sdhD	NC_000913.3	755515	755536	b0721
 

In [30]:
utr5_arcz.saveas('../results/redux/utr5_arcz.bed')


Out[30]:
<BedTool(../../results/redux/utr5_arcz.bed)>

In [31]:
arcz_genes = set()
with open('../../results/redux/utr5_arcz.bed') as fi:
    for line in fi:
        fields = line.strip().split('\t')
        arcz_genes.update(fields[4])
        
len(arcz_genes)


Out[31]:
46

In [53]:
arcz.set_chromsizes(chromsizes)
result = arcz.randomstats(utr5,
                          iterations=10000,
                          processes=30,
                          shuffle_kwargs={'chrom': True})

In [54]:
result


Out[54]:
{'../../results/arcz.fixed.bed': 4318,
 '../../results/redux/wt_tmut.utrs.corr.old.bed': 499,
 'actual': 74,
 'file_a': '../../results/arcz.fixed.bed',
 'file_b': '../../results/redux/wt_tmut.utrs.corr.old.bed',
 'frac randomized above actual': 0.0161,
 'frac randomized below actual': 0.97770000000000001,
 'iterations': 10000,
 'lower_2.5th': 44.0,
 'median randomized': 58.0,
 'normalized': 1.2758620689655173,
 'other': 499,
 'percentile': 98.084999999999994,
 'self': 4318,
 'upper_97.5th': 73.0}

In [57]:
dsra.set_chromsizes(chromsizes)
result_dsra = dsra.randomstats(utr5,
                          iterations=10000,
                          processes=30,
                          shuffle_kwargs={'chrom': True})

In [58]:
result_dsra


Out[58]:
{'../../results/dsra.fixed.bed': 4318,
 '../../results/redux/wt_tmut.utrs.corr.old.bed': 499,
 'actual': 88,
 'file_a': '../../results/dsra.fixed.bed',
 'file_b': '../../results/redux/wt_tmut.utrs.corr.old.bed',
 'frac randomized above actual': 0.0001,
 'frac randomized below actual': 0.99960000000000004,
 'iterations': 10000,
 'lower_2.5th': 43.0,
 'median randomized': 58.0,
 'normalized': 1.5172413793103448,
 'other': 499,
 'percentile': 99.980000000000004,
 'self': 4318,
 'upper_97.5th': 73.0}

In [59]:
rpra.set_chromsizes(chromsizes)
result_rpra = rpra.randomstats(utr5,
                          iterations=10000,
                          processes=30,
                          shuffle_kwargs={'chrom': True})

In [60]:
result_rpra


Out[60]:
{'../../results/redux/wt_tmut.utrs.corr.old.bed': 499,
 '../../results/rpra.fixed.bed': 4318,
 'actual': 67,
 'file_a': '../../results/rpra.fixed.bed',
 'file_b': '../../results/redux/wt_tmut.utrs.corr.old.bed',
 'frac randomized above actual': 0.12139999999999999,
 'frac randomized below actual': 0.85060000000000002,
 'iterations': 10000,
 'lower_2.5th': 44.0,
 'median randomized': 58.0,
 'normalized': 1.1551724137931034,
 'other': 499,
 'percentile': 86.465000000000003,
 'self': 4318,
 'upper_97.5th': 74.0}

In [ ]:
#### pybedtools.cleanup(remove_all=True)