In [1]:
from Bio import Entrez, Seq, SeqIO
from Bio.Alphabet import IUPAC
In [2]:
Entrez.email = "put@your_email.here"
hdl = Entrez.efetch(db='nucleotide', id=['NM_002299'], rettype='fasta') # Lactase gene
#for l in hdl:
# print l
seq = SeqIO.read(hdl, 'fasta')
In [3]:
w_seq = seq[11:5795]
w_seq
Out[3]:
In [4]:
w_hdl = open('example.fasta', 'w')
SeqIO.write([w_seq], w_hdl, 'fasta')
w_hdl.close()
In [5]:
recs = SeqIO.parse('example.fasta', 'fasta')
for rec in recs:
print(type(rec))
seq = rec.seq
print(rec.description)
print(seq[:10])
print(seq.alphabet)
In [6]:
seq = Seq.Seq(str(seq), IUPAC.unambiguous_dna)
seq
Out[6]:
In [7]:
print((seq[:12], seq[-12:]))
rna = seq.transcribe()
rna
Out[7]:
In [8]:
prot = seq.translate()
prot
Out[8]:
In [8]: