In [1]:
%load_ext autoreload
%autoreload 2
In [5]:
%matplotlib inline
from glam2_wrapper import Glam2
In [6]:
from utilities import Weblogo
wl = Weblogo(color_scheme='classic')
glam2 = Glam2(alphabet='dna',
gap_in_alphabet=True,
scoring_criteria='pwm',
alignment_runs=5,
weblogo_obj = wl)
glam2.fit(fasta_file="seq18.fa")
INFO:glam2_wrapper:The output directory 'glam2_out' already exists.
Its contents will be overwritten.
Run 1... 24740 iterations
Run 2... 21327 iterations
Run 3... 20880 iterations
Run 4... 19902 iterations
Run 5... 26986 iterations
In [7]:
for i in glam2.original_motives_list:
for j in i:
print j
print
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCA-AATT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGA-AGATCACTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCA-AATT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGA-AGATCACTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAA')
('ara', 'GATTATTTGCACGGCGTCACACTT')
('bglr1', 'ATAACTGTGAGCATGGTCATATTT')
('crp', 'ATGTATGCAAAGGACGTCACATTA')
('cya', 'AAGG-TGTTAAATTGATCACGTTT')
('deop2', 'AATTATTTGAACCAGATCGCATTA')
('gale', 'ACTAATTTATTCCATGTCACACTT')
('ilv', 'GTTA-TCTGCAATTCAGTACAAAA')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAACAGAGATCACACAA')
('malk', 'AATTTCGTGATGTTGCTTGCAAAA')
('malt', 'GAAT-TGTGACACAGTGCAAATTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTT')
('tnaa', 'ACGATTGTGATTCGATTCACATTT')
('uxu1', 'ATTGTTGTGATGTGGTTAACCCAA')
('pbr322', 'GCGG-TGTGAAATACCGCACAGAT')
('tdc', 'GTTAATTTGTGAGTGGTCGCACAT')
('ce1cg', 'GTTTTTT-TGATCGTTTTCACAAAAATG')
('ara', 'GATTATT-TGCACGGCGTCACACTTTGC')
('bglr1', 'AATAACTGTGAGCATGGTCATATTTTTA')
('crp', 'GCATGTA-TGCAAAGGACGTCACATTAC')
('cya', 'ACTTGTA-TGTAGCGCATCTTTCTTTAC')
('deop2', 'AATTATT-TGAACCAGATCGCATTACAG')
('gale', 'ACTAATT-TATTCCATGTCACACTTTTC')
('ilv', 'ACAAAACGTGATCAACCCCTCAATTTTC')
('lac', 'AATTAATGTGAGTTAGCTCACTCATTAG')
('male', 'AATTCTG-TAACAGAGATCACACAAAGC')
('malk', 'GAATTTCGTGATGTTGCTTGCAAAAATC')
('malt', 'AAAGATT-TG-GAATTGTGACACAGTGC')
('ompa', 'ATATGCC-TGACGGAGTTCACACTTGTA')
('tnaa', 'AACGATTGTGATTCGATTCACATTTAAA')
('uxu1', 'AATTGTTGTGATGTGGTTAACCCAATTA')
('pbr322', 'ATATGCGGTGTGAAATACCGCACAGATG')
('trn9cat', 'AATAAAT-AAATCCTGGTGTCCCTGTTG')
('tdc', 'GTTAATT-TGTGAGTGGTCGCACATATC')
('ce1cg', 'GTTTTTTTGATCGTTT-TCACAAAAAT')
('ara', 'ATTATTTGCAC-GGCG-TCACACTTTG')
('bglr1', 'AAATTCCTAA--AATT-ACACAAAGTT')
('crp', 'AAAACAGTCAG-GATG-CTACAGTAAT')
('cya', 'AAATTGATCAC-GTTTTAGACCATTTT')
('deop2', 'AAACTTGTAA--GTAGATTTCCTTAAT')
('gale', 'AATTTATTCC---ATG-TCACACTTTT')
('ilv', 'AAAATTTTCC--ATTG-TCTCCCCTGT')
('lac', 'ATTAATGTGA--GTTA-GCTCACTCAT')
('male', 'AATTCTGTAAC-AGAGATCACACAAAG')
('malk', 'AATTTCGTGAT-GTTGCTTGCAAAAAT')
('malt', 'AGATT-TGGA--ATTG-TGACACAGTG')
('ompa', 'ATATGCCTGAC-GGAGTTCACACTTGT')
('tnaa', 'AAATTCTTAC--GTAATTTATAATCTT')
('uxu1', 'ATTGTTGTGAT-GTGGTTAACCCAATT')
('pbr322', 'ATTGTACTGA--GAGT-GCACCATATG')
('trn9cat', 'AAATAAATCC---TGG-TGTCCCTGTT')
('tdc', 'AATTT-GTGA--GTGG-TCGCACATAT')
In [8]:
for i in glam2.aligned_motives_list:
for j in i:
print j
print
('ce1cg', 'TTTGATCG-TTTTCACAAAA')
('ara', 'TTTGCACG-GCGTCACACTT')
('bglr1', 'TGTGAGCA-TGGTCATATTT')
('crp', 'TGCAAAGG-ACGTCACATTA')
('cya', 'TGTTAAAT-TGATCACGTTT')
('deop2', 'TTTGAACC-AGATCGCATTA')
('gale', 'TGTAAACG-ATTCCACTAAT')
('ilv', 'CGTGATCA-ACCCCTCAATT')
('lac', 'TGTGAGTT-AGCTCACTCAT')
('male', 'TGTAACAG-AGATCACACAA')
('malk', 'CGTGATGT-TGCTTGCAAAA')
('malt', 'TGTGACAC-AGTGCAAATT-')
('ompa', 'CCTGACGG-AGTTCACACTT')
('tnaa', 'TGTGATTC-GATTCACATTT')
('uxu1', 'TGTGATGT-GGTTAACCCAA')
('pbr322', 'TGTGAAAT-ACCGCACAGAT')
('trn9cat', 'TGTGACGGAAGATCACTT--')
('tdc', 'TGTGAGTG-GTCGCACATAT')
('ce1cg', 'TTTGATCG-TTTTCACAAAA')
('ara', 'TTTGCACG-GCGTCACACTT')
('bglr1', 'TGTGAGCA-TGGTCATATTT')
('crp', 'TGCAAAGG-ACGTCACATTA')
('cya', 'TGTTAAAT-TGATCACGTTT')
('deop2', 'TTTGAACC-AGATCGCATTA')
('gale', 'TGTAAACG-ATTCCACTAAT')
('ilv', 'CGTGATCA-ACCCCTCAATT')
('lac', 'TGTGAGTT-AGCTCACTCAT')
('male', 'TGTAACAG-AGATCACACAA')
('malk', 'CGTGATGT-TGCTTGCAAAA')
('malt', 'TGTGACAC-AGTGCAAATT-')
('ompa', 'CCTGACGG-AGTTCACACTT')
('tnaa', 'TGTGATTC-GATTCACATTT')
('uxu1', 'TGTGATGT-GGTTAACCCAA')
('pbr322', 'TGTGAAAT-ACCGCACAGAT')
('trn9cat', 'TGTGACGGAAGATCACTT--')
('tdc', 'TGTGAGTG-GTCGCACATAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAA')
('ara', 'GATTATTTGCACGGCGTCACACTT')
('bglr1', 'ATAACTGTGAGCATGGTCATATTT')
('crp', 'ATGTATGCAAAGGACGTCACATTA')
('cya', '-AAGGTGTTAAATTGATCACGTTT')
('deop2', 'AATTATTTGAACCAGATCGCATTA')
('gale', 'ACTAATTTATTCCATGTCACACTT')
('ilv', '-GTTATCTGCAATTCAGTACAAAA')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAACAGAGATCACACAA')
('malk', 'AATTTCGTGATGTTGCTTGCAAAA')
('malt', '-GAATTGTGACACAGTGCAAATTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTT')
('tnaa', 'ACGATTGTGATTCGATTCACATTT')
('uxu1', 'ATTGTTGTGATGTGGTTAACCCAA')
('pbr322', '-GCGGTGTGAAATACCGCACAGAT')
('tdc', 'GTTAATTTGTGAGTGGTCGCACAT')
('ce1cg', '----GTT----TTTTTGATCGTTTTCACAAAAATG')
('ara', '----GAT----TATTTGCACGGCGTCACACTTTGC')
('bglr1', '----AATAAC---TGTGAGCATGGTCATATTTTTA')
('crp', '----GCATG--TATGCAAAGGACGTCACATTAC--')
('cya', '----ACTTG--TATGTAGCGCATCTTTCTTTAC--')
('deop2', '----AAT----TATTTGAACCAGATCGCATTACAG')
('gale', '----ACT----AATTTATTCCATGTCACACTTTTC')
('ilv', '----ACAA---AACGTGATCAACCCCTCAATTTTC')
('lac', '----AATT---AATGTGAGTTAGCTCACTCATTAG')
('male', '----AAT----TCTGTAACAGAGATCACACAAAGC')
('malk', '----GAAT---TTCGTGATGTTGCTTGCAAAAATC')
('malt', 'AAAGATTTGGAATTGTGACACAGTGC---------')
('ompa', '----ATA----TGCCTGACGGAGTTCACACTTGTA')
('tnaa', '----AACG---ATTGTGATTCGATTCACATTTAAA')
('uxu1', '----AATT---GTTGTGATGTGGTTAACCCAATTA')
('pbr322', '----ATATGC-GGTGTGAAATACCGCACAGATG--')
('trn9cat', '----AATAAA-TAAATCCTGGTGTCCCTGTTG---')
('tdc', '----GTT----AATTTGTGAGTGGTCGCACATATC')
('ce1cg', '--GTTTTTTTGATCGTTTTCACAAAAAT--')
('ara', '--ATTATTTGCACGGCG-TCACACTTTG--')
('bglr1', 'AAATTCCTAAAA------TTACACAAAGTT')
('crp', 'AAAACAGTCAGGATG---CTACAGTAAT--')
('cya', 'AAATTGATCACGTT----TTAGACCATTTT')
('deop2', 'AAACTTGT---AAGTAGATTTCCTTAAT--')
('gale', '-AATTTATTCCATG----TCACACTTTT--')
('ilv', 'AAAATTTTCCATTG----TCTCCCCTGT--')
('lac', '--ATTAATGTGAGTTAGCTCACTCAT----')
('male', '--AATTCTGTAACAGAGATCACACAAAG--')
('malk', '--AATTTCGTGATGTTGCTTGCAAAAAT--')
('malt', '--AGATTTGGAATTG---TGACACAGTG--')
('ompa', '--ATATGCCTGACGGAGTTCACACTTGT--')
('tnaa', 'AAATTCTT---ACGTAATTTATAATCTT--')
('uxu1', '--ATTGTTGTGATGTGGTTAACCCAATT--')
('pbr322', '------ATTGTACTGAGAGTGCACCATATG')
('trn9cat', 'AAAT-----AAATCCTGGTGTCCCTGTT--')
('tdc', '--AATTTGTGAGTGG---TCGCACATAT--')
In [9]:
for i in glam2.motives_list:
for j in i:
print j
print
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCACAATT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGAAAGATCACTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCACAATT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGATAGATCACTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAA')
('ara', 'GATTATTTGCACGGCGTCACACTT')
('bglr1', 'ATAACTGTGAGCATGGTCATATTT')
('crp', 'ATGTATGCAAAGGACGTCACATTA')
('cya', 'AAGG-TGTTAAATTGATCACGTTT')
('deop2', 'AATTATTTGAACCAGATCGCATTA')
('gale', 'ACTAATTTATTCCATGTCACACTT')
('ilv', 'GTTATTCTGCAATTCAGTACAAAA')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAACAGAGATCACACAA')
('malk', 'AATTTCGTGATGTTGCTTGCAAAA')
('malt', 'GAATTTGTGACACAGTGCAAATTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTT')
('tnaa', 'ACGATTGTGATTCGATTCACATTT')
('uxu1', 'ATTGTTGTGATGTGGTTAACCCAA')
('pbr322', 'GCGGATGTGAAATACCGCACAGAT')
('tdc', 'GTTAATTTGTGAGTGGTCGCACAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAAATG')
('ara', 'GATTATTTGCACGGCGTCACACTTTGC')
('bglr1', 'AATAACTTGAGCATGGTCATATTTTTA')
('crp', 'GCATGTATGCAAAGGACGTCACATTAC')
('cya', 'ACTTGTATGTAGCGCATCTTTCTTTAC')
('deop2', 'AATTATTTGAACCAGATCGCATTACAG')
('gale', 'ACTAATTTATTCCATGTCACACTTTTC')
('ilv', 'ACAAAACTGATCAACCCCTCAATTTTC')
('lac', 'AATTAATTGAGTTAGCTCACTCATTAG')
('male', 'AATTCTGTAACAGAGATCACACAAAGC')
('malk', 'GAATTTCTGATGTTGCTTGCAAAAATC')
('malt', 'AAAGATTTGCGAATTGTGACACAGTGC')
('ompa', 'ATATGCCTGACGGAGTTCACACTTGTA')
('tnaa', 'AACGATTTGATTCGATTCACATTTAAA')
('uxu1', 'AATTGTTTGATGTGGTTAACCCAATTA')
('pbr322', 'ATATGCGTGTGAAATACCGCACAGATG')
('trn9cat', 'AATAAATAAATCCTGGTGTCCCTGTTG')
('tdc', 'GTTAATTTGTGAGTGGTCGCACATATC')
('ce1cg', 'GTTTTTTTGAGTTTTCACAAAAAT')
('ara', 'ATTATTTGCAGGCGTCACACTTTG')
('bglr1', 'AAATTCCTAAAATTACACAAAGTT')
('crp', 'AAAACAGTCAGATGCTACAGTAAT')
('cya', 'AAATTGATCAGTTTAGACCATTTT')
('deop2', 'AAACTTGTAAGTAGTTTCCTTAAT')
('gale', 'AATTTATTCCAATGTCACACTTTT')
('ilv', 'AAAATTTTCCATTGTCTCCCCTGT')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAAAGAGTCACACAAAG')
('malk', 'AATTTCGTGAGTTGTTGCAAAAAT')
('malt', 'AGATTCTGGAATTGTGACACAGTG')
('ompa', 'ATATGCCTGAGGAGTCACACTTGT')
('tnaa', 'AAATTCTTACGTAATTATAATCTT')
('uxu1', 'ATTGTTGTGAGTGGTAACCCAATT')
('pbr322', 'ATTGTACTGAGAGTGCACCATATG')
('trn9cat', 'AAATAAATCC-TGGTGTCCCTGTT')
('tdc', 'AATTTCGTGAGTGGTCGCACATAT')
In [10]:
predictions = glam2.predict(input_seqs='seq18.fa', return_list=True)
for p in predictions: print p
[0, 1, 2]
[0, 1, 2]
[0, 1]
[0, 1, 2]
[0, 1]
[0, 1, 2]
[0, 1]
[0, 1]
[0, 1, 2]
[0, 1, 2]
[0, 1]
[]
[0, 1]
[0, 1]
[0, 1]
[0, 1]
[]
[0, 1, 2]
In [11]:
predictions = glam2.predict(input_seqs="seq9.fa", return_list=False)
for p in predictions: print p
3
3
2
3
2
3
2
2
3
In [12]:
match = glam2.transform(input_seqs='seq9.fa', return_match=True)
for m in match: print m
[[(64, 83, 6.018168375655859e-07)], [(64, 83, 6.018168375655859e-07)], [(59, 83, 7.995400927298347e-09)], [], []]
[[(58, 77, 8.49623770680827e-08)], [(58, 77, 8.49623770680827e-08)], [(53, 77, 5.6044685346158614e-09)], [], []]
[[(79, 98, 1.184826898957247e-07)], [(79, 98, 1.184826898957247e-07)], [], [], []]
[[(66, 85, 8.850247611258612e-08)], [(66, 85, 8.850247611258612e-08)], [(61, 85, 1.4613132771739134e-09)], [], []]
[[(53, 72, 4.298691696897042e-08)], [(53, 72, 4.298691696897042e-08)], [], [], []]
[[(10, 29, 2.901616895405503e-07)], [(10, 29, 2.901616895405503e-07)], [(5, 29, 3.392334393439441e-08)], [], []]
[[(27, 46, 8.597383393794084e-08)], [(27, 46, 8.597383393794084e-08)], [], [], []]
[[(42, 61, 2.321824219003032e-08)], [(42, 61, 2.321824219003032e-08)], [], [], []]
[[(12, 31, 1.1284065704354736e-06)], [(12, 31, 1.1284065704354736e-06)], [(7, 31, 6.784668786878883e-09)], [], []]
In [13]:
match = glam2.transform(input_seqs='seq9.fa', return_match=False)
for m in match: print m
[1, 1, 1, 0, 0]
[1, 1, 1, 0, 0]
[1, 1, 0, 0, 0]
[1, 1, 1, 0, 0]
[1, 1, 0, 0, 0]
[1, 1, 1, 0, 0]
[1, 1, 0, 0, 0]
[1, 1, 0, 0, 0]
[1, 1, 1, 0, 0]
In [14]:
glam_2 = Glam2(alphabet='dna', gap_in_alphabet=True, scoring_criteria='pwm', alignment_runs=6)
predictions = glam_2.fit_predict(fasta_file='seq9.fa', return_list=True)
for p in predictions: print p
[5]
[0, 1, 2, 5]
[0, 1, 2, 5]
[0, 1, 2, 5]
[5]
[0, 1, 2, 5, 5]
[0, 1, 2, 3, 4, 5]
[5]
[0, 1, 2, 5]
In [15]:
matches = glam_2.fit_transform(fasta_file='seq9.fa', return_match=True)
for m in matches: print m
[[], [], [], [], [], [(57, 71, 0.0002477927800044669)]]
[[(53, 78, 1.3253233773097963e-07)], [(53, 78, 1.3253233773097963e-07)], [(53, 78, 1.3253233773097963e-07)], [], [], [(86, 100, 7.048327964571501e-05)]]
[[(74, 99, 7.573276441770265e-09)], [(74, 99, 7.573276441770265e-09)], [(74, 99, 7.573276441770265e-09)], [], [], [(89, 103, 0.0003700372181400038)]]
[[(61, 86, 1.9722074067110066e-09)], [(61, 86, 1.9722074067110066e-09)], [(61, 86, 1.9722074067110066e-09)], [], [], [(41, 55, 0.00015418217422500155)]]
[[], [], [], [], [], [(52, 66, 0.000289091576671878)]]
[[(5, 30, 4.6593399983547544e-08)], [(5, 30, 4.6593399983547544e-08)], [(5, 30, 4.6593399983547544e-08)], [], [], [(23, 37, 0.00010278811615000105), (56, 70, 3.6710041482143234e-06)]]
[[(40, 65, 1.1359914662655397e-07)], [(40, 65, 1.1359914662655397e-07)], [(40, 65, 1.1359914662655397e-07)], [(35, 65, 1.9699829996374405e-09)], [(35, 65, 1.9699829996374405e-09)], [(67, 81, 8.223049292000083e-05)]]
[[], [], [], [], [], [(11, 25, 6.167286969000063e-05)]]
[[(7, 32, 5.409483172693046e-09)], [(7, 32, 5.409483172693046e-09)], [(7, 32, 5.409483172693046e-09)], [], [], [(77, 91, 2.312732613375024e-05)]]
In [16]:
#printing motives as lists
for motif in glam2.motives_list:
for m in motif:
print m
print
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCAAATTT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGAGATCACTTTA')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCAAATT-')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGAGATCACTTTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAA')
('ara', 'GATTATTTGCACGGCGTCACACTT')
('bglr1', 'ATAACTGTGAGCATGGTCATATTT')
('crp', 'ATGTATGCAAAGGACGTCACATTA')
('cya', 'AAAGGTGTTAAATTGATCACGTTT')
('deop2', 'AATTATTTGAACCAGATCGCATTA')
('gale', 'ACTAATTTATTCCATGTCACACTT')
('ilv', 'GGTTATCTGCAATTCAGTACAAAA')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAACAGAGATCACACAA')
('malk', 'AATTTCGTGATGTTGCTTGCAAAA')
('malt', 'GGAATTGTGACACAGTGCAAATTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTT')
('tnaa', 'ACGATTGTGATTCGATTCACATTT')
('uxu1', 'ATTGTTGTGATGTGGTTAACCCAA')
('pbr322', 'AGCGGTGTGAAATACCGCACAGAT')
('tdc', 'GTTAATTTGTGAGTGGTCGCACAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAAATG')
('ara', 'GATTATTTGCACGGCGTCACACTTTGC')
('bglr1', 'AATTCTGTGAGCATGGTCATATTTTTA')
('crp', 'GCATATGCAAAGGACGTCACATTAC-C')
('cya', 'ACTTATGTAGCGCATCTTTCTTTACTA')
('deop2', 'AATTATTTGAACCAGATCGCATTACAG')
('gale', 'ACTAATTTATTCCATGTCACACTTTTC')
('ilv', 'ACAAACGTGATCAACCCCTCAATTTTC')
('lac', 'AATAATGTGAGTTAGCTCACTCATTAG')
('male', 'AATTCTGTAACAGAGATCACACAAAGC')
('malk', 'GAATTCGTGATGTTGCTTGCAAAAATC')
('malt', 'ATTATTGTGACACAGTGCT-ATTTTTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTTGTA')
('tnaa', 'AACATTGTGATTCGATTCACATTTAAA')
('uxu1', 'AATGTTGTGATGTGGTTAACCCAATTA')
('pbr322', 'ATAGGTGTGAAATACCGCACAGATGGG')
('trn9cat', 'AATTAAATCCTGGTGTCCCTGTTGTAG')
('tdc', 'GTTAATTTGTGAGTGGTCGCACATATC')
('ce1cg', 'GTTTTTTTGATCGTTCACAAAAAT')
('ara', 'ATTATTTGCACGGGTCACACTTTG')
('bglr1', 'ATTCCTAAAACGTGTTACACAAAG')
('crp', 'AACAGTCAGGATG-CTACAGTAAT')
('cya', 'ATTGATCACGTTGTTTAGACCATT')
('deop2', 'ACTTGTTGAAAGTGTTTCCTTAAT')
('gale', 'ATTTATTCCATGT-TCACACTTTT')
('ilv', 'AATTTTCCATTGGGTCTCCCCTGT')
('lac', 'ATTAATGTGAGTTGTCACTCATGT')
('male', 'AATTCTGTAACAGGTCACACAAAG')
('malk', 'AATTTCGTGATGTGTTGCAAAAAT')
('malt', 'AGATTTGGAATTGGTGACACAGTG')
('ompa', 'ATATGCCTGACGGGTCACACTTGT')
('tnaa', 'ATTCTTGGGACGTATTATAATCTT')
('uxu1', 'ATTGTTGTGATGTGTAACCCAATT')
('pbr322', 'AAACATTGTACTGGGTGCACCATA')
('trn9cat', 'ATTTATCAAATCCGTGTCCCTGTT')
('tdc', 'AATTTGTGAGTGGGTCGCACATAT')
In [17]:
glam2.display_logo(do_alignment=False)
In [18]:
glam2.display_logo(motif_num=1)
In [19]:
glam2.align_motives() #MSA with Muscle
motives1=glam2.aligned_motives_list
for m in motives1:
for i in m:
print i
print
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCAAATTT')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGAGATCACTTTA')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'TTTGATCGTTTTCACAAAA')
('ara', 'TTTGCACGGCGTCACACTT')
('bglr1', 'TGTGAGCATGGTCATATTT')
('crp', 'TGCAAAGGACGTCACATTA')
('cya', 'TGTTAAATTGATCACGTTT')
('deop2', 'TTTGAACCAGATCGCATTA')
('gale', 'TGTAAACGATTCCACTAAT')
('ilv', 'CGTGATCAACCCCTCAATT')
('lac', 'TGTGAGTTAGCTCACTCAT')
('male', 'TGTAACAGAGATCACACAA')
('malk', 'CGTGATGTTGCTTGCAAAA')
('malt', 'TGTGACACAGTGCAAATT-')
('ompa', 'CCTGACGGAGTTCACACTT')
('tnaa', 'TGTGATTCGATTCACATTT')
('uxu1', 'TGTGATGTGGTTAACCCAA')
('pbr322', 'TGTGAAATACCGCACAGAT')
('trn9cat', 'TGTGACGGAGATCACTTTT')
('tdc', 'TGTGAGTGGTCGCACATAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAA')
('ara', 'GATTATTTGCACGGCGTCACACTT')
('bglr1', 'ATAACTGTGAGCATGGTCATATTT')
('crp', 'ATGTATGCAAAGGACGTCACATTA')
('cya', 'AAAGGTGTTAAATTGATCACGTTT')
('deop2', 'AATTATTTGAACCAGATCGCATTA')
('gale', 'ACTAATTTATTCCATGTCACACTT')
('ilv', 'GGTTATCTGCAATTCAGTACAAAA')
('lac', 'ATTAATGTGAGTTAGCTCACTCAT')
('male', 'AATTCTGTAACAGAGATCACACAA')
('malk', 'AATTTCGTGATGTTGCTTGCAAAA')
('malt', 'GGAATTGTGACACAGTGCAAATTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTT')
('tnaa', 'ACGATTGTGATTCGATTCACATTT')
('uxu1', 'ATTGTTGTGATGTGGTTAACCCAA')
('pbr322', 'AGCGGTGTGAAATACCGCACAGAT')
('tdc', 'GTTAATTTGTGAGTGGTCGCACAT')
('ce1cg', 'GTTTTTTTGATCGTTTTCACAAAAATG')
('ara', 'GATTATTTGCACGGCGTCACACTTTGC')
('bglr1', 'AATTCTGTGAGCATGGTCATATTTTTA')
('crp', 'GCATATGCAAAGGACGTCACATTACC-')
('cya', 'ACTTATGTAGCGCATCTTTCTTTACTA')
('deop2', 'AATTATTTGAACCAGATCGCATTACAG')
('gale', 'ACTAATTTATTCCATGTCACACTTTTC')
('ilv', 'ACAAACGTGATCAACCCCTCAATTTTC')
('lac', 'AATAATGTGAGTTAGCTCACTCATTAG')
('male', 'AATTCTGTAACAGAGATCACACAAAGC')
('malk', 'GAATTCGTGATGTTGCTTGCAAAAATC')
('malt', 'ATTATTGTGACACAGTGC-TATTTTTC')
('ompa', 'ATATGCCTGACGGAGTTCACACTTGTA')
('tnaa', 'AACATTGTGATTCGATTCACATTTAAA')
('uxu1', 'AATGTTGTGATGTGGTTAACCCAATTA')
('pbr322', 'ATAGGTGTGAAATACCGCACAGATGGG')
('trn9cat', 'AATTAAATCCTGGTGTCCCTGTTGTAG')
('tdc', 'GTTAATTTGTGAGTGGTCGCACATATC')
('ce1cg', 'GTTTTTTTGATCGTTCACAAAAAT')
('ara', 'ATTATTTGCACGGGTCACACTTTG')
('bglr1', 'ATTCCTAAAACGTGTTACACAAAG')
('crp', 'AACAGTCAGG-ATGCTACAGTAAT')
('cya', 'ATTGATCACGTTGTTTAGACCATT')
('deop2', 'ACTTGTTGAAAGTGTTTCCTTAAT')
('gale', 'ATTTATTCCATGT-TCACACTTTT')
('ilv', 'AATTTTCCATTGGGTCTCCCCTGT')
('lac', 'ATTAATGTGAGTTGTCACTCATGT')
('male', 'AATTCTGTAACAGGTCACACAAAG')
('malk', 'AATTTCGTGATGTGTTGCAAAAAT')
('malt', 'AGATTTGGAATTGGTGACACAGTG')
('ompa', 'ATATGCCTGACGGGTCACACTTGT')
('tnaa', 'ATTCTTGGGACGTATTATAATCTT')
('uxu1', 'ATTGTTGTGATGTGTAACCCAATT')
('pbr322', 'AAACATTGTACTGGGTGCACCATA')
('trn9cat', 'ATTTATCAAATCCGTGTCCCTGTT')
('tdc', 'AATTTGTGAGTGGGTCGCACATAT')
In [20]:
glam2.display_logo(do_alignment=True)
In [21]:
glam2.display()
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
-: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00
A: 0.00 0.00 0.00 0.17 0.94 0.33 0.22 0.11 0.56 0.06 0.22 0.00 0.11 0.78 0.00 0.78 0.28 0.44 0.33
C: 0.17 0.06 0.06 0.00 0.06 0.22 0.33 0.17 0.00 0.22 0.28 0.11 0.83 0.00 0.89 0.06 0.33 0.00 0.00
G: 0.00 0.78 0.00 0.78 0.00 0.17 0.28 0.44 0.22 0.50 0.17 0.22 0.00 0.11 0.00 0.06 0.06 0.00 0.00
T: 0.83 0.17 0.94 0.06 0.00 0.28 0.17 0.28 0.22 0.17 0.33 0.67 0.06 0.11 0.06 0.11 0.33 0.56 0.67
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
-: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00
A: 0.00 0.00 0.00 0.17 0.94 0.33 0.22 0.11 0.56 0.06 0.22 0.00 0.11 0.78 0.00 0.78 0.28 0.44 0.33
C: 0.17 0.06 0.06 0.00 0.06 0.22 0.33 0.17 0.00 0.22 0.28 0.11 0.83 0.00 0.89 0.06 0.33 0.00 0.00
G: 0.00 0.78 0.00 0.78 0.00 0.17 0.28 0.44 0.22 0.50 0.17 0.22 0.00 0.11 0.00 0.06 0.06 0.00 0.00
T: 0.83 0.17 0.94 0.06 0.00 0.28 0.17 0.28 0.22 0.17 0.33 0.67 0.06 0.11 0.06 0.11 0.33 0.56 0.67
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
-: 0.00 0.00 0.00 0.00 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
A: 0.65 0.35 0.18 0.35 0.35 0.00 0.00 0.00 0.18 0.76 0.35 0.35 0.06 0.47 0.06 0.24 0.00 0.06 0.82 0.06 0.82 0.18 0.47 0.41
C: 0.00 0.18 0.00 0.00 0.12 0.12 0.12 0.06 0.00 0.12 0.18 0.29 0.24 0.00 0.24 0.18 0.00 0.82 0.00 0.88 0.06 0.41 0.00 0.06
G: 0.35 0.00 0.24 0.18 0.06 0.00 0.59 0.00 0.76 0.00 0.18 0.24 0.35 0.18 0.59 0.29 0.18 0.00 0.18 0.00 0.06 0.06 0.00 0.00
T: 0.00 0.47 0.59 0.47 0.24 0.88 0.29 0.94 0.06 0.12 0.29 0.12 0.35 0.35 0.12 0.29 0.82 0.12 0.00 0.06 0.06 0.35 0.53 0.53
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
-: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.61 0.00 0.00 0.06 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
A: 0.72 0.56 0.33 0.28 0.56 0.17 0.11 0.00 0.06 0.17 0.61 0.22 0.28 0.28 0.39 0.06 0.28 0.00 0.06 0.56 0.00 0.78 0.17 0.50 0.28 0.33 0.28 0.22
C: 0.00 0.22 0.06 0.00 0.06 0.17 0.17 0.00 0.00 0.00 0.11 0.11 0.39 0.28 0.00 0.17 0.17 0.17 0.72 0.00 0.89 0.11 0.67 0.00 0.00 0.06 0.00 0.50
G: 0.28 0.00 0.00 0.11 0.28 0.00 0.11 0.39 0.00 0.83 0.00 0.28 0.22 0.28 0.28 0.56 0.33 0.00 0.17 0.22 0.00 0.00 0.00 0.00 0.17 0.06 0.17 0.28
T: 0.00 0.22 0.61 0.61 0.11 0.67 0.61 0.00 0.94 0.00 0.22 0.39 0.11 0.17 0.33 0.22 0.22 0.83 0.06 0.22 0.11 0.11 0.17 0.50 0.56 0.56 0.56 0.00
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
-: 0.00 0.00 0.00 0.00 0.00 0.11 0.00 0.00 0.00 0.00 0.56 0.94 0.11 0.00 0.00 0.00 0.61 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
A: 0.94 0.61 0.50 0.22 0.11 0.22 0.11 0.00 0.22 0.78 0.00 0.00 0.22 0.22 0.22 0.11 0.11 0.11 0.06 0.67 0.00 0.67 0.33 0.39 0.39 0.39 0.00
C: 0.00 0.00 0.00 0.06 0.11 0.22 0.17 0.00 0.33 0.22 0.22 0.06 0.00 0.00 0.06 0.00 0.06 0.06 0.56 0.00 0.94 0.33 0.56 0.06 0.11 0.00 0.00
G: 0.06 0.06 0.00 0.11 0.06 0.06 0.39 0.11 0.44 0.00 0.06 0.00 0.67 0.17 0.22 0.67 0.00 0.11 0.17 0.11 0.00 0.00 0.06 0.00 0.17 0.11 0.22
T: 0.00 0.33 0.50 0.61 0.72 0.39 0.33 0.89 0.00 0.00 0.17 0.00 0.00 0.61 0.50 0.22 0.22 0.72 0.22 0.22 0.06 0.00 0.06 0.56 0.33 0.50 0.78
In [22]:
glam2.matrix()
Out[22]:
[array([[ 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0.05555556,
0. , 0. , 0. , 0. , 0.05555556,
0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0.16666667, 0.94444444,
0.33333333, 0.22222222, 0.11111111, 0.55555556, 0.05555556,
0.22222222, 0. , 0.11111111, 0.77777778, 0. ,
0.77777778, 0.27777778, 0.44444444, 0.33333333],
[ 0.16666667, 0.05555556, 0.05555556, 0. , 0.05555556,
0.22222222, 0.33333333, 0.16666667, 0. , 0.22222222,
0.27777778, 0.11111111, 0.83333333, 0. , 0.88888889,
0.05555556, 0.33333333, 0. , 0. ],
[ 0. , 0.77777778, 0. , 0.77777778, 0. ,
0.16666667, 0.27777778, 0.44444444, 0.22222222, 0.5 ,
0.16666667, 0.22222222, 0. , 0.11111111, 0. ,
0.05555556, 0.05555556, 0. , 0. ],
[ 0.83333333, 0.16666667, 0.94444444, 0.05555556, 0. ,
0.27777778, 0.16666667, 0.27777778, 0.22222222, 0.16666667,
0.33333333, 0.66666667, 0.05555556, 0.11111111, 0.05555556,
0.11111111, 0.33333333, 0.55555556, 0.66666667]]),
array([[ 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0.05555556,
0. , 0. , 0. , 0. , 0.05555556,
0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0.16666667, 0.94444444,
0.33333333, 0.22222222, 0.11111111, 0.55555556, 0.05555556,
0.22222222, 0. , 0.11111111, 0.77777778, 0. ,
0.77777778, 0.27777778, 0.44444444, 0.33333333],
[ 0.16666667, 0.05555556, 0.05555556, 0. , 0.05555556,
0.22222222, 0.33333333, 0.16666667, 0. , 0.22222222,
0.27777778, 0.11111111, 0.83333333, 0. , 0.88888889,
0.05555556, 0.33333333, 0. , 0. ],
[ 0. , 0.77777778, 0. , 0.77777778, 0. ,
0.16666667, 0.27777778, 0.44444444, 0.22222222, 0.5 ,
0.16666667, 0.22222222, 0. , 0.11111111, 0. ,
0.05555556, 0.05555556, 0. , 0. ],
[ 0.83333333, 0.16666667, 0.94444444, 0.05555556, 0. ,
0.27777778, 0.16666667, 0.27777778, 0.22222222, 0.16666667,
0.33333333, 0.66666667, 0.05555556, 0.11111111, 0.05555556,
0.11111111, 0.33333333, 0.55555556, 0.66666667]]),
array([[ 0. , 0. , 0. , 0. , 0.23529412,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. ],
[ 0.64705882, 0.35294118, 0.17647059, 0.35294118, 0.35294118,
0. , 0. , 0. , 0.17647059, 0.76470588,
0.35294118, 0.35294118, 0.05882353, 0.47058824, 0.05882353,
0.23529412, 0. , 0.05882353, 0.82352941, 0.05882353,
0.82352941, 0.17647059, 0.47058824, 0.41176471],
[ 0. , 0.17647059, 0. , 0. , 0.11764706,
0.11764706, 0.11764706, 0.05882353, 0. , 0.11764706,
0.17647059, 0.29411765, 0.23529412, 0. , 0.23529412,
0.17647059, 0. , 0.82352941, 0. , 0.88235294,
0.05882353, 0.41176471, 0. , 0.05882353],
[ 0.35294118, 0. , 0.23529412, 0.17647059, 0.05882353,
0. , 0.58823529, 0. , 0.76470588, 0. ,
0.17647059, 0.23529412, 0.35294118, 0.17647059, 0.58823529,
0.29411765, 0.17647059, 0. , 0.17647059, 0. ,
0.05882353, 0.05882353, 0. , 0. ],
[ 0. , 0.47058824, 0.58823529, 0.47058824, 0.23529412,
0.88235294, 0.29411765, 0.94117647, 0.05882353, 0.11764706,
0.29411765, 0.11764706, 0.35294118, 0.35294118, 0.11764706,
0.29411765, 0.82352941, 0.11764706, 0. , 0.05882353,
0.05882353, 0.35294118, 0.52941176, 0.52941176]]),
array([[ 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.61111111, 0. , 0. ,
0.05555556, 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. ],
[ 0.72222222, 0.55555556, 0.33333333, 0.27777778, 0.55555556,
0.16666667, 0.11111111, 0. , 0.05555556, 0.16666667,
0.61111111, 0.22222222, 0.27777778, 0.27777778, 0.38888889,
0.05555556, 0.27777778, 0. , 0.05555556, 0.55555556,
0. , 0.77777778, 0.16666667, 0.5 , 0.27777778,
0.33333333, 0.27777778, 0.22222222],
[ 0. , 0.22222222, 0.05555556, 0. , 0.05555556,
0.16666667, 0.16666667, 0. , 0. , 0. ,
0.11111111, 0.11111111, 0.38888889, 0.27777778, 0. ,
0.16666667, 0.16666667, 0.16666667, 0.72222222, 0. ,
0.88888889, 0.11111111, 0.66666667, 0. , 0. ,
0.05555556, 0. , 0.5 ],
[ 0.27777778, 0. , 0. , 0.11111111, 0.27777778,
0. , 0.11111111, 0.38888889, 0. , 0.83333333,
0. , 0.27777778, 0.22222222, 0.27777778, 0.27777778,
0.55555556, 0.33333333, 0. , 0.16666667, 0.22222222,
0. , 0. , 0. , 0. , 0.16666667,
0.05555556, 0.16666667, 0.27777778],
[ 0. , 0.22222222, 0.61111111, 0.61111111, 0.11111111,
0.66666667, 0.61111111, 0. , 0.94444444, 0. ,
0.22222222, 0.38888889, 0.11111111, 0.16666667, 0.33333333,
0.22222222, 0.22222222, 0.83333333, 0.05555556, 0.22222222,
0.11111111, 0.11111111, 0.16666667, 0.5 , 0.55555556,
0.55555556, 0.55555556, 0. ]]),
array([[ 0. , 0. , 0. , 0. , 0. ,
0.11111111, 0. , 0. , 0. , 0. ,
0.55555556, 0.94444444, 0.11111111, 0. , 0. ,
0. , 0.61111111, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. ],
[ 0.94444444, 0.61111111, 0.5 , 0.22222222, 0.11111111,
0.22222222, 0.11111111, 0. , 0.22222222, 0.77777778,
0. , 0. , 0.22222222, 0.22222222, 0.22222222,
0.11111111, 0.11111111, 0.11111111, 0.05555556, 0.66666667,
0. , 0.66666667, 0.33333333, 0.38888889, 0.38888889,
0.38888889, 0. ],
[ 0. , 0. , 0. , 0.05555556, 0.11111111,
0.22222222, 0.16666667, 0. , 0.33333333, 0.22222222,
0.22222222, 0.05555556, 0. , 0. , 0.05555556,
0. , 0.05555556, 0.05555556, 0.55555556, 0. ,
0.94444444, 0.33333333, 0.55555556, 0.05555556, 0.11111111,
0. , 0. ],
[ 0.05555556, 0.05555556, 0. , 0.11111111, 0.05555556,
0.05555556, 0.38888889, 0.11111111, 0.44444444, 0. ,
0.05555556, 0. , 0.66666667, 0.16666667, 0.22222222,
0.66666667, 0. , 0.11111111, 0.16666667, 0.11111111,
0. , 0. , 0.05555556, 0. , 0.16666667,
0.11111111, 0.22222222],
[ 0. , 0.33333333, 0.5 , 0.61111111, 0.72222222,
0.38888889, 0.33333333, 0.88888889, 0. , 0. ,
0.16666667, 0. , 0. , 0.61111111, 0.5 ,
0.22222222, 0.22222222, 0.72222222, 0.22222222, 0.22222222,
0.05555556, 0. , 0.05555556, 0.55555556, 0.33333333,
0.5 , 0.77777778]])]
In [23]:
glam2.display(motif_num=3)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
-: 0.00 0.00 0.00 0.00 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
A: 0.65 0.35 0.18 0.35 0.35 0.00 0.00 0.00 0.18 0.76 0.35 0.35 0.06 0.47 0.06 0.24 0.00 0.06 0.82 0.06 0.82 0.18 0.47 0.41
C: 0.00 0.18 0.00 0.00 0.12 0.12 0.12 0.06 0.00 0.12 0.18 0.29 0.24 0.00 0.24 0.18 0.00 0.82 0.00 0.88 0.06 0.41 0.00 0.06
G: 0.35 0.00 0.24 0.18 0.06 0.00 0.59 0.00 0.76 0.00 0.18 0.24 0.35 0.18 0.59 0.29 0.18 0.00 0.18 0.00 0.06 0.06 0.00 0.00
T: 0.00 0.47 0.59 0.47 0.24 0.88 0.29 0.94 0.06 0.12 0.29 0.12 0.35 0.35 0.12 0.29 0.82 0.12 0.00 0.06 0.06 0.35 0.53 0.53
In [24]:
test_seq = 'GGAGAAAATACCGC' * 10
seq_score = glam2.score(motif_num=2, seq=test_seq)
print seq_score
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
In [27]:
glam_3 = Glam2(alphabet='dna', gap_in_alphabet=True, scoring_criteria='hmm', alignment_runs=3)
matches = glam_3.fit_transform(fasta_file="seq9.fa", return_match=True)
for m in matches: print m
[[(74, 99, 1.6893966329306973)], [(75, 105, 0.99650017142605085)], [(29, 43, 1.1639833862098581)]]
[[(2, 27, 1.7596598975319826)], [(4, 34, 1.118784663702455)], [(31, 45, 1.0654232287919718)]]
[[(45, 70, 1.9259230545291977)], [(47, 77, 1.2194138851155221)], [(57, 71, 1.0172329430321891)]]
[[(2, 27, 1.7984720016686966)], [(1, 31, 1.0588929987283731)], [(63, 77, 1.0694554774868004)]]
[[(81, 106, 1.7157386883917156)], [(75, 105, 1.043898906347779)], [(89, 103, 1.08968183322277)]]
[[(5, 30, 1.6769265842550016)], [(5, 35, 0.97196493625763247)], [(88, 102, 1.0644643412628048)]]
[[(29, 54, 1.7516935261934661)], [(4, 34, 1.056391836400133)], [(7, 21, 1.0987052181415942)]]
[[(35, 60, 1.7715245185413044)], [(28, 58, 1.0339071386236649)], [(33, 47, 1.0433442102594948)]]
[[(5, 30, 1.6044114809851271)], [(1, 31, 0.93743793336510006)], [(79, 93, 1.0865433597952816)]]
In [ ]:
Content source: fabriziocosta/pyMotif
Similar notebooks: