In [25]:
import numpy as np
from skbio import BiologicalSequence
seq = [np.random.choice(list('ACGT')) for i in range (250000)]
seq_id = "42"

In [26]:
for i in (500, 5000, 50000):
    subseq = seq[:i]
    print i
    %timeit (subseq, seq_id)


500
10000000 loops, best of 3: 59.4 ns per loop
5000
10000000 loops, best of 3: 59.6 ns per loop
50000
10000000 loops, best of 3: 58.9 ns per loop

In [27]:
for i in (500, 5000, 50000):
    subseq = seq[:i]
    print i
    %timeit (''.join(subseq), seq_id)


500
100000 loops, best of 3: 5.35 µs per loop
5000
10000 loops, best of 3: 51.7 µs per loop
50000
1000 loops, best of 3: 478 µs per loop

In [28]:
for i in (500, 5000, 50000):
    subseq = seq[:i]
    print i
    %timeit BiologicalSequence(subseq, seq_id)


500
100000 loops, best of 3: 6.01 µs per loop
5000
10000 loops, best of 3: 54 µs per loop
50000
1000 loops, best of 3: 475 µs per loop

In [ ]: