In [50]:
import Bio
import re
In [61]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
with open("/Users/bergeric/Downloads/dmel-all-chromosome-r6.16.fasta") as x:
first= next(SeqIO.parse(x, "fasta"))
#print(first.seq[6529:9484])
print(first)
In [4]:
#writing files with SeqIO.write()
#SeqIO.write(first, "test_record.fasta", "fasta")
Out[4]:
In [118]:
#matching to genes and slicing:
f = SeqIO.parse("/Users/bergeric/Downloads/dmel-all-chromosome-r6.16.fasta", "fasta")
chrom_dict = SeqIO.to_dict(f)
my_records = []
with open("/Users/bergeric/Projects/s2rnai/output/phastcons_workflow/gene_only_slop.txt") as g:
for line in g:
splitline = line.split('\t')
chrom = splitline[0]
start = int(splitline[3])
stop = int(splitline[4])
lesschrom = chrom[3:]
descrip = splitline[8]
match = re.compile('ID=(\w*);.*').match(descrip)
name = match.group(1)
if lesschrom == 'M':
look_seq = chrom_dict['mitochondrion_genome'].seq
else:
look_seq = chrom_dict[lesschrom].seq
seqslice = look_seq[start - 1:stop]
new_record = SeqRecord(seqslice, id=name, description="")
my_records.append(new_record)
SeqIO.write(my_records, "slopped_genes.fasta", "fasta")
Out[118]:
In [ ]: