notebook.community

Edit and run



In [1]:

    
import Bio.motifs



In [2]:

    
record = Bio.motifs.parse(open('all-motifs/meme.txt'), 'meme')  # load meme output



In [20]:

    
len(record)  # get the total number of motifs









    Out[20]:





20



In [4]:

    
# select motifs found in at least 300 sites
select_motifs = []
for motif in record:
    if len(motif.instances) > 300:
        select_motifs.append(motif)



In [19]:

    
# select sequences that contain all select motifs
sequences = {}
for motif in select_motifs:
    for inst in motif.instances:
        sequences[inst.sequence_name] = sequences.get(inst.sequence_name, 0) + 1
        
select_sequences = set([seq for seq, count in sequences.iteritems() if count == len(select_motifs)])



In [18]:

    
# write motifs from all sequences to separate files
for motif in select_motifs:
    outfile = open('%s.fa' % motif.name.replace(' ', '-'), 'w')
    for inst in motif.instances:
        if inst.sequence_name in select_sequences:
            outfile.write('>%s\n%s\n' % (inst.sequence_name, str(inst)))
    outfile.close()