In [1]:
from DNASkittleUtils.Contigs import read_contigs, Contig, write_contigs_to_file
In [2]:
contigs = read_contigs("D:\Genomes\Ash BATG-0.5-CLCbioSSPACE\BATG-0.5-CLCbioSSPACE.fa")
Using the two contig names you sent me it's simplest to do this:
In [5]:
desired_contigs = ['Contig' + str(x) for x in [1131, 3182, 39106, 110, 5958]]
desired_contigs
Out[5]:
If you have a genuinely big file then I would do the following:
In [7]:
grab = [c for c in contigs if c.name in desired_contigs]
len(grab)
Out[7]:
Ya! There's two contigs.
In [8]:
import os
print(os.getcwd())
write_contigs_to_file('data2/sequences_desired.fa', grab)
In [9]:
[c.name for c in grab[:100]]
Out[9]:
In [1]:
import os
os.path.realpath('')
Out[1]:
In [ ]: