In [1]:
import Bio.motifs
In [2]:
record = Bio.motifs.parse(open('all-motifs/meme.txt'), 'meme') # load meme output
In [20]:
len(record) # get the total number of motifs
Out[20]:
In [4]:
# select motifs found in at least 300 sites
select_motifs = []
for motif in record:
if len(motif.instances) > 300:
select_motifs.append(motif)
In [19]:
# select sequences that contain all select motifs
sequences = {}
for motif in select_motifs:
for inst in motif.instances:
sequences[inst.sequence_name] = sequences.get(inst.sequence_name, 0) + 1
select_sequences = set([seq for seq, count in sequences.iteritems() if count == len(select_motifs)])
In [18]:
# write motifs from all sequences to separate files
for motif in select_motifs:
outfile = open('%s.fa' % motif.name.replace(' ', '-'), 'w')
for inst in motif.instances:
if inst.sequence_name in select_sequences:
outfile.write('>%s\n%s\n' % (inst.sequence_name, str(inst)))
outfile.close()