In [1]:
import pandas as pd
import pybedtools
import os
import sys
utils_path = os.path.abspath(os.path.join('..'))
if utils_path not in sys.path:
sys.path.append(utils_path)
from utils.rpos import *
In [2]:
pybedtools.cleanup(remove_all=True)
In [3]:
chromsizes = {'NC_000913.3': (0, 4641651)}
arcz = pybedtools.BedTool('../results/arcz.fixed.bed')
dsra = pybedtools.BedTool('../results/dsra.fixed.bed')
rpra = pybedtools.BedTool('../results/rpra.fixed.bed')
utr5 = pybedtools.BedTool('../results/wt_tmut.utrs.corr.old.bed')
In [4]:
utr5_arcz = utr5.intersect(arcz, wa=True, wb=True)
utr5_arcz.saveas('../results/utr5_arcz.bed')
arcz_genes = set()
with open('../results/utr5_arcz.bed') as fi:
for line in fi:
fields = line.strip().split('\t')
arcz_genes.add(fields[4])
len(arcz_genes)
Out[4]:
In [5]:
utr5_dsra = utr5.intersect(dsra, wa=True, wb=True)
utr5_dsra.saveas('../results/utr5_dsra.bed')
dsra_genes = set()
with open('../results/utr5_dsra.bed') as fi:
for line in fi:
fields = line.strip().split('\t')
dsra_genes.add(fields[4])
len(dsra_genes)
Out[5]:
In [6]:
utr5_rpra = utr5.intersect(rpra, wa=True, wb=True)
utr5_rpra.saveas('../results/utr5_rpra.bed')
rpra_genes = set()
with open('../results/utr5_rpra.bed') as fi:
for line in fi:
fields = line.strip().split('\t')
rpra_genes.add(fields[4])
len(rpra_genes)
Out[6]:
In [7]:
genes = set()
with open('../results/s1_s2_intersect.txt') as fi:
for line in fi:
genes.add(line.strip())
len(genes)
Out[7]:
In [8]:
len(genes & arcz_genes)
Out[8]:
In [9]:
len(genes & dsra_genes)
Out[9]:
In [10]:
len(genes & rpra_genes)
Out[10]:
In [11]:
len(genes & arcz_genes & dsra_genes)
Out[11]:
In [12]:
len(genes & arcz_genes & rpra_genes)
Out[12]:
In [13]:
len(genes & rpra_genes & dsra_genes)
Out[13]:
In [15]:
[g if g in genes else '' for g in arcz_genes]
Out[15]:
In [22]:
res = []
for gene in genes:
c = []
if gene in arcz_genes:
c.append('arcZ')
if gene in dsra_genes:
c.append('dsrA')
if gene in rpra_genes:
c.append('rprA')
res.append(','.join(c))
d = {'genes': list(genes), 'sRNA': res}
In [24]:
df = pd.DataFrame.from_dict(d)
df.to_csv(''../results/s1_s2_intersect.sRNA.csv')
In [ ]: