In [30]:
from screed.fasta import fasta_iter
In [18]:
with open("../outputs/rna/match_ortho.rna.moleculo.txt") as rna_mol_match:
rna_mol_set = set([line.split(' ')[0] for line in rna_mol_match if line.strip()])
with open("../outputs/rna/match_ortho.rna.ref.txt") as rna_ref_match:
rna_ref_set = set([line.split(' ')[0] for line in rna_ref_match if line.strip()])
with open("../outputs/rna/ortho.fa") as ortho_file:
E = set([line.split(' ')[0][1:] for line in ortho_file if line.startswith('>')])
In [20]:
B = rna_mol_set & rna_ref_set
In [21]:
C = rna_mol_set - B
In [22]:
A = rna_ref_set - B
In [24]:
D = E - A - C - B
In [53]:
def write_output_fasta(outfile, seqfile, keys):
n_written = 0
with open(outfile, 'w') as output:
with open(seqfile, 'r') as sequences:
for seq in fasta_iter(sequences):
if seq['name'] in keys:
output.write(">{name} {description}\n{sequence}\n".format(**seq))
n_written += 1
return n_written
In [54]:
write_output_fasta("../outputs/rna/cobb/A-ref_rna.fa", "../outputs/rna/ortho.fa", A)
write_output_fasta("../outputs/rna/cobb/B-ref_rna_mol.fa", "../outputs/rna/ortho.fa", B)
write_output_fasta("../outputs/rna/cobb/C-mol_rna.fa", "../outputs/rna/ortho.fa", C)
write_output_fasta("../outputs/rna/cobb/D-only_rna.fa", "../outputs/rna/ortho.fa", D)
Out[54]:
In [ ]: