In [1]:
    
!pip install biopython
    
    
In [2]:
    
import requests
url = 'https://raw.githubusercontent.com/Serulab/Py4Bio/master/samples/casein.gb'
input_file_name = url.split('/')[-1]
response = requests.get(url)
open(input_file_name, 'wb').write(response.content)
    
    Out[2]:
In [0]:
    
from Bio import SeqIO
with open(input_file_name, 'r') as input_handle:
    sequences = SeqIO.parse(input_handle, 'genbank')
    with open('casein.fasta', 'w') as output_handle:
        SeqIO.write(sequences, output_handle, 'fasta')
    
In [0]:
    
with open("casein.fasta") as output_handle:
    print(output_handle.read())
    
In [0]:
    
# Using casein.fasta file generated in previous code
with open("casein.fasta") as input_handle:
    sequences = SeqIO.parse(input_handle, 'fasta')
    for sequence in sequences:
        SeqIO.write(sequence, sequence.name, "fasta")
    
In [16]:
    
!ls
    
    
In [17]:
    
!cat FJ429671.1
    
    
In [19]:
    
record_dict = SeqIO.to_dict(SeqIO.parse("casein.fasta", "fasta"))
print(record_dict["FJ429671.1"])  # use any record ID
    
    
In [26]:
    
# Using casein.fasta file generated in the first code
MIN_SEQ_LENGTH = 400
sequences = SeqIO.parse("casein.fasta", 'fasta')
filtered = []
for total, sequence in enumerate(sequences):
    if len(sequence.seq) > MIN_SEQ_LENGTH:
      filtered.append(sequence)
      
print("total sequences: {}".format(total))
print("after filter sequences: {}".format(len(filtered)))