In [1]:
from Bio import SeqIO
from random import sample

In [6]:
sequences = [s for s in SeqIO.parse('data/human_h1_aligned.fasta', 'fasta')]
sequences = sample(sequences, 1000)
len(sequences)
SeqIO.write(sequences, 'data/human_h1_aligned_downsampled.fasta', 'fasta')


Out[6]:
1000

In [ ]: