In [37]:
import numpy as np
In [40]:
def Hamming_Distance(sequence_one,sequence_two):
if len(sequence_one)==len(sequence_two):
length_of_sequence = len(sequence_one)
count = 0
for i in range(length_of_sequence):
if sequence_one[i] != sequence_two[i]:
count += 1
return count
else:
print("Sequences must be strings of the same length")
In [195]:
seq_one = "CAGAAAGGAAGGTCCCCATACACCGACGCACCAGTTTA"
seq_two = "CACGCCGTATGCATAAACGAGCCGCACGAACCAGAGAG"
print(Hamming_Distance(seq_one,seq_two))
In [ ]:
In [185]:
def ApproximatePatternCount(Text,Pattern,d):
count = 0
for i in range(len(Text) - len(Pattern)+1):
Pattern_two = Text[i:i + len(Pattern)]
if Hamming_Distance(Pattern,Pattern_two) <= d:
count = count + 1
return count
In [196]:
result = ApproximatePatternCount("CGTGACAGTGTATGGGCATCTTT","TGT",1)
print(result)
In [166]:
def ApproximatePatternMatching(Pattern,Text,d):
positions = []
for i in range(len(Text)-len(Pattern) + 1):
approx_pattern = Text[i:i+len(Pattern)]
if Hamming_Distance(Pattern,approx_pattern) < d:
positions.append(i)
np_positions = np.array(positions)
return np_positions
In [167]:
test = ApproximatePatternMatching("CGTGGATTTTA","GGGTGTAGCGACCTAAACGGATTGCCCGGCTCTCACTAATGATGACTACGTATTCCCATTATCTGGCCCTTACCCGACCATTGAGAGATGCAACATCGCTCGAAGGCGGAGTTGTTCTGGTTCGAACGGTAACGTACAATCAGAACAGATAGGACGCGTAGTACCAACTAGGCAAGCCCATAGGCGAGGATCTGATTAACCCGTTATGACATCACCATTAAGCTCCCCACCCATTGCTGCGGGCAAAGGCAGCTCCGGTCTTGAGGCTCCGCAGACTTGGGAAAGTGCCACCCTCGGGCGCACGAGATCGAATGAACAGTTAACGATGCCGGAACAAAAAATAGAGTGTCCAACGAGACTGCGGTGTGTCAACGAATCTGCGCCGATGTATCTCTCTGCATGATATTATAACCCGTACTACAGCCCTGTAAGCTTTTGTGTCTATCTCCCCGGACATTACCCTGCTCGGGCTCAACGTGAAAAAATATCGTCGTGACACCCCGCGGGACGCCTCAGACGAGGTCCTAACTCATCGCCGCGTGGAGGCAAGCCCTAGTTTGGTTAGCTGTTCTAGTGTTTCATTGTACCGCCACGCGACAATATGATCTTAGCGCTTGCTTCAACGGGATCCCGGCAGTCCGCACCTCCTCACCGTAGCTTTCTCCTCGAGTTGCGGTGCTAAAGTCTGTTCTTAGCCTTACGCGGCCTTTGTTAACACCACTGTAGTTTGTACTTCGATAGGCGCCTTCTTAGTGTTACGGCGCATTCGTCAGATACGTAAAACATCTCCGACGTAAGAAGTCCTTGCGCGACAGCGTTATATGCGAATTCATTCTGAGGAGAGCTGGATTCAACTTCCCGCCGGCGAGGACGGACAACGATGCTTCGTTATTCCCCGCGTACAGTATGTCTTCATGTACGGTTTAATCGCATAATGCACAACATTCACATCGCATCTTGAGCATCTCCTTCATTCATAGGGGGAGCCACCTGGAGAATGAGTAGAACTGGGTATAGTCAGAGAGCCCGCCAATCGATTTATTCTAGGTCATCCGAGTGCCGGGACTTGTGTTTAGGGGTCTGGAGTGGAGAATTTCCCTGCGTAACGGTCAATTCCCAAATAGGGTTGTCTTAGCATTGTTCCTCGTACCCACTGAAATACCTAACCTCGGGTGGCGCGGGAAGTGATCGTAGTGCTACGTCTATCAAACCAGTAGACACATAACCCTACGGCAGATGTAAATACGCAATGCAAATACGTTAACGCAGAGCTTACGAATGGTATAACCGTTACCCCAGAGATGGAACACAAGAGATTCCGAGATCGCGAAAACAACTGCCCGGGAAGAAATTACAGTTGTAGACCTCCTTAGGGCTATGCCTGGCCGCTTACGGTCTAAATAAGACGGACGTTCACTTGAGGCGGTTCCCCAAACCTCGCGTTTGGTGAGTTGGGCACTTAACGGATCTGACAACCGATAAAGACCCAGAGTCGGAGTCGCTACATACCATGGCGCAGGACTCCTTCCGAGACTCTAATCATGAGACTTTCCATAGTTGAATTAGCAGGACACACAACTTGCGGGTTACCGTACGGATAGAAATAACTCCTACGTACCGGTTTAATTTGTACCATTGCCGTCCAAACCTGCTACGTGACTTCTGAGTTCCATAGCGCAGTTACATAGTCCGAGGATAAGGCTGCAGCTGGCATCCTGTACACTGATTGGCCGCGCGCTGGAGGGTTTGGTTCTATGCCCTCGCTCTGATTGCGCCAACCAAAGGGGTGACAACTCTTGACAGAAGATTACTTACGATCTCCAACTTAAATGCTGAGGTCAGACCACCTTGGGCATGGTCTGGTCTCAGGAACTGTGAACTTGGCAACTGAAGGCCCAACGTCCCAGATTGGTCATTGTGGTCCTTGTCTCACTTACCAGAACAACCGAGCAAGTGTGCGTATTTTGTCGCAAGCCAGCGGAGTTCACCGCACATCAAGCTCATTCATGACACGTCTAAAAGAATCATTTCACTCAACTGGGCCCTTCTGGCGGGGCTATGATATATTGGATTCGGGCAGACTAGCATGCGTTTGTAGGACCGGGGACCACGTACCCATCATAGCAGCCGTTCGACATCGATCAACGACCTACATAGCCCTCTGCTTACCCAGCCGTCCGTCCCGATTCTCTTAGCCTACATTGGCAAGGGAAGAAACAATCAGGTTAGGAGAACCGTTGTGAGAACTGTAATGTGTGCGGTTTAGAAGTCATGAAATTCAGCTCCGAGTCTTGAGCCAAAACCACGCACTAACCCGAAGGTCGATTCACTGTTATTCCTTCGGAGCTTGTCTCCAGATTGGCAGGTTGGACTTTAAGAATTACTGCTAAGTATATAGGAGGCCGAGAGCGAGGGATGCTCTTGCATACGTGACCTCTACAAGCTCTAAGGTTGAGATTAGACACAATGCGCCGTCAAACCAACGCTTGGCAGAACTGGTCTGAGGCACCAGTATCCAACACGCTTACAACTGGTTCTACTTTTAAGTTTGGTAGAAAGCTTGCGACGAACGTCAAGAGTGCTAGTTATCCGATCCTCTTGAGGGTAGCGTTTAGCGAACAGCAGCGGTTTCCCCTAAGTCCTGTGTGCCTATCCGCAGCGGAAGCAATACCGGGTGCCCCGCTACAATAACTAAACCGTTGGGTAGATAAAGGAGTAGTCGCTGTATAATGATCCCCTAGACAAAGAGATTAACGGACTATTACAAAGACGCCGCAAGACATGTTAAACCATAGAGACCTCAATCTTTGTCATCCCAAGCTTCGCCCGCGATTTACATGATCGCCAAGGTAGCTCCCTGACAATGCGCTATCGCATACGGCTCCAAGGGGTATGCTAGCTTGAACGTATTTATAGAATCATCGACGCACACGGATCTGGCGCGACGCACGTTTATAGTTACACTTGTGTGGGCCAAATCTTGTACCTCTGTAATTCCGTCACATATTGAGACGTGACCCAGCGCATAGCAGGAGCTGGTCATGGACTACAGCCGACACATGCGGGCTCTCCTACTCGGACGGGGTGGGAAGTTCGTCAACGCTTGACGTCATGCGAACATCACTTGGATACAAAGACATGAGTCGTGCTAACTCCAGACCCATGTGTGGTACTTATAGGACGACGAGAGACCTGAGCCGATCGTCTCGCGTCATCGCGTCCGCAGCGGCCTGCACAGCTATTCCCGCATAGTTGGGAAGGCATAACAGAAGTCGATCGGTGCGCCGTTTTAACCTATTGACGGCCGAATCTCTAAAACGAGGGCCTTCCCCTAGGCCGGAAGATCAATTTACCAGCGTCCTATAGTCATAGTGACTGTTGACAGCGGGACCTATTATGGGCCCCTTATGAATGAACTTCCCCAAAACAACCGCAAGACACACCATCATTGTCTGACCTATGCATGTGCAATCACTACAAATGACCTCTGTTGTCTGACTGGTTAGACTACCCCACGCGCACTGACGTGCCAACAGGAGGCCAGCTGAAGGCAGCAAACAATTTCCCGGTGTGCGAGGCCACGATAGAATTTCCTCGCTCCGGCCACTATGCTGGTGGTCGCAAATCAGAGCTGCATTTGGCCGGAGGTCACGCACACGCTATCCCGCTATCAGTTTGACGCTCGGATGGGACTACTCGTGCTACGCGTAGATTCATCTTTCGCGAATAGCGTCCACAGCGGAATAGTCAGATCAGTCAATAATGCTGAGCAAGGAACTAGATTACCGTACCGCGGACTACGCGATAGACGGCGAGGGTACTGGAAATTGTCTGATCATGCGCCAGCCCCCAATCACGCAGGAAAAGTCAACTCGTCGGAAAGGAGCGCCAAGTGAGTGTACTGGCACCCGATGTCCGATTTGGCTTGTTCTTCGCCGTCCCATCGAATGTTCGAGAAGTCGTCAGGTTCAGTAGTGGGCAAAGCGCTTGCTAGCATATGCTGATGAAGACTGGATTGACGGGCTTGTGGCATCAGGCCTCGATGTCACAGTGCGGCCGAATAATGGTCCAGTTAGATTGCACAGAAGTACGCACCGCCAGGAGAGCTCTTTCGTTTCATTCCTGGCCGCCGAATTCGAGACCCGATTGGCTGCAATATCCCAGACTACCGCTTCAGCTGGGCTGCAAGCGATAGTGTACCTCGCGAGAGGACGTCAGCGTGACCGGGCGTGCCCATGTCTCGAAGCGCCAAACTTTGTATACCTCGCGCCCACCCGTGCGCAGAGCGCATACGGGTGGAGGGTCAGCCATAATTAACAATCTCGCCACACTATGCAACCAGGCATCGGAACTCTGGATGCAACCGTCGGCGAATTGTAGCATTGAGCCCAAGTCGCATGTCACAGACCTTTTTGGCTGGGTTATACCCCTGGATCGGAAGGAGGTTTCTCCGATATGATCTACTACAGGACTTTGTACATAAGGGGTTTTTCTGAGCCGGGCTTGGATTGCCGACAGGGCATGCAACTTAAAGTAGTAATGGTTACATTCGGCTCCTGAGTTCATACTCCGTTGCACGCAGGAGATGATGAGATGTGAATATTCCGGTTGACTCAGTCCGCATTCACGTCCTCCGATGTCTCTACATGAGAAAGCATCCCGCGAAAATTCTGTACCTGGGGACTTGGACGGGATCCTAACAGCACGTCGCCCGTCCGATAATGGCGTGTCCTAATAGCCCCCACATTAATCTGTAGGATTTTGAGCCCTCGGCCTCTTAAGAGGCCACTATTCCGACGATCTGCTCAGTTAATAGAGGCCGAAGCTCAGCGAAAGAAACGCGGACGATCGCGGGGAGAGGATACCGAGGCACATGTGTGTATGTAGCGGTGAGGATCGCACACGGGAAGGGGTGTTAAAGAGGACGTATCGTCGGATCATGCCTCAACACCAAAAAGGCACACTCCCTCACCCCGGTACTGGTAGCGTCATACAGTGATGGCTGCTAGCTCGTGGACTACGGTGAAACATCGCATGGACTGCAAATACCGCGTGTGCCCTCTTCAGCCGAGGACACTGCTTGTAGGGATAAAGCGGGAGTAGTTTGAGGGTAAGTATGCCGATATCATGCATGACCAAGAAGAGACGTACGAGCACTGCTATCAGATTTAAGTCTGCGAGATCTGAAGACGAAGCCACGATTATGGTCTGGGGGGCTTCATCCGAGTGAGTCTTAGCAATAACGCACACCACGGATGTCTGGTCTCGCCCCTGTTAGGGAGCCCGCTAGGCGGTCCATTTATCAGCATCTTTCGAGCGTTCGCGTGTTGCTTATGGGGGGCGGTCTCATGTGGGGCTCGGTTCTGAAGCTCTCGGTTGGGTTTAGGACCCCCGAGGCCGTCCTGTCGGTCCCCTACGCAAGAACCCGTGCCGGCACACCTGGTAATGTCCGCGGCAGCGACTGTTCTTGGGTATCTCCACCGTCAGACCAAAGGAACCGGCGATGAAAGCGGTTCCTCGGGGATTCCCCTCAGTTCCTTTTTCGCGGCTCGCCAACACCATGCTGGTGCACTATCAAAGAAAGCCACAGTTTCCGAATGAGATCTGATGGGTCGACCAGTCACCAAGAGACCTATAACCGAGATCTTCGCCCCGCTGATCCAGGTCCAATCTCCACGTAAGGACATGCATTTTCCAAGAGCTTTCCTTACTGCAAGCTCCCGCGCGAAGTGGAATGGATCTACGCAGCAGCAACGGGCGCTGCTCCGTGAATAATCGATCAGTTGTGTGGTCTTCCGCAAGATTTCTCTCTCCACACTGTGATCAGTCGGTGTAATCCGGATTAGTCTTGGAGTACGCTCGTCAAACACTTGGGCATAGTCCGCTTCAGATGAGTTGCGTTTTCCCTTTTAGGCCTAAAACGAAAGGCTCTAGGCTAGAAACTGGTCGCGTCCATTGGCCGCCGTGATCATTTGGATGCCCTTGTGTGCACGATAGCTGGCAGGACCTAATCCTATCCACTGACGGATCTATAATGCTCCAGCATAAAAAACCTGTCGCCGAGTCGGTAGTAAGGTAAACCATAGAGCCTGCTAGTTCTGACCTACGCTTTCCATAACTCGAGTGCCTTATCTTAAAATCTCAGGGCTACACTGATGTTTCACAGCCAGTCGCAATCCTCGGCTTTCAGCATAGCCCTGTTACTATTTCGGTATCGGTCGAAAAGGTTGATTGACAGATGAGCCGTCGGGTACGAACCATGCCAACTAGCGGACCTAACACGACGTTAGCATGAGCTGTACGTTCTTCGTCGGGCGGAGGTGCCGCGCGGATGTCCGGGAATGTGGCGGAGTGACTCTAGCATCTCATACGACCTGATCAACCACACATTTGGCAATCCCGAATTCGCTCTTCGCCGGCCAATATGAGCCATACGTTAAGAAGCTGCGCTCTCATCTCACTAAAATGTGATAACCCGTGTACGTCCGGTATCTACTAGATATGTATGGGATAGTTTCCTTGTATAGGTCCCCTCTTTCTAATTCTGAGCCATCGCCGCAGTTTGAGTATGACGATGAAGTTGTTTGAATTAGGGAACTTTTCCCCACTGTAGCTATTGTTACGTTATCTACGCTCATCTCACTTGCTGCCAGAGCCCCTGGGATGAAGTCGAGTACAACGACTGGAAGGTTGCTCCCCATCGGACCACGCATTACGCTACGATGTCTTCGTCTATGCCGGTTCACTGGTTAACGACCCGCTCAGACGACGAAAAGCTCAGTCATTAAACGTCAGAGGATCCACATTCCGTCTCCAACTGAATTCGGTGGAAGTACTGCGCCCCTGCTACGGTTCCGTTGGCATGGTCGAGTATCCCAGGGCCGTTTACCACGTATGCGAGGCTTCTCTGAGACGAGTCCTACCGTTCTGTTGTCGTCGAGGTTAGACGTAGAACGACCTTCGTGGGGTCCATCTCGGTTGGCTTGCTCCGAGGTCGTACATCCGGCCGACCGGATACCATACTAATCCCCATGGGGGGAGCCTTTTCGGTTGTTATGCAATCGGCTTACAGCTCCAAGAACCGGCACCTATCATGGCTAGTTTATCCGGACTCCGCTGGCAATTGCACGCAAGCTTTCTCGAATAAGTCAGTGATGATTGCAGCACCCCCCTACAGGTGGAATAGCCGGAACTGGCCTGGTTTCGGAACGGATTGACATACCCCTGCGTTTTGCCCTCCCATGGTCCGCAATAGCCACCCAGACTCGGACCATAGTAGTATTTTATTCACTGCAACTGCGGGCCTGACGTCGAGTCTAAAGATGTTCGCACCGATCCCACGTCCGGAGTGATAAGCCTTGTATAGTGACAGGTGTTAAATGCGACAATCGGTTTTGTTGCGTGCTAGACTATTCAGTGTATCTTGAAATGTTGGGAATACGCTCCGTGGGGTTAACTAGCATGTCAGGTAGTAAGGTTTTAGTGAGGAAAGAAGATCTGAACGTTACTTCAAGTTGTTGTCCTTCCTATTACTCATCGGGTATGGGATTATAGGCCCTATTTAATACGTCCTGCATTGAGTCCATGTATCCTTTTTTAAACTTATCCGCAACCCCCTACCCCTTACTGTTCTGCAACGAGACTATGGCCCCCGGAAGTGAAAGAACACTGCGAGGGCTGTACCAATATAGAGGAAAGACTCAAGGAACACGACCTAAATCCCTCCCGATTAATCGATGCGCAAGTCAGGCCATAGCCCCTCTGCGCGTGTCGAATTAGGGGTGTATGGTTGAGTCCAGATAGTAAAGGTGGGTTCCACAACCATGCGAGCCCGGCCCTTAATAAGGCCCGCACGCGATCACACTAATACCCAGCAAAAGACCCAAGATTTGGGCAGCATGCGGAAACGGGGTGACTCTGTGGTGCCTGACTCGTGACCCCCCCTCAAAGCCGCATAGAGACTCAGAATACCACTGCACTGACGTTAACCATTTTAAGTTCTACAGGATGGGCCCTGCTCTCAGGTAAATTAGCGATTGTAATGCATACGATATCCCACGGATCGCAGGATATCAAGAGTCAATCAGCCGCCATAATGTACGCCGGATAGCTGCAGTACAGGCGCCGGGTAAAGGGGGGCACGGGACAGAGTCGGTAGAATGTCACAACATGCTTCCGAGACTGTATGGTGAGATGTGCAGGCCAGAAAAGGTCTTATATCGGAGCGTGCTAGTACTCGTTCAGTCGCATCGTGAAAGAACGGCATTCAGTGAGCCCCCTACTTACTCGGGAAGTAGGTCTTAGGGGACTCGGTTGACATATCAACCTCGCTGACGAATTTGGCCTGTAGTGCTACAAACGCAGGTCTGCCGCATAGCGTTAGCAGCTCTTAGAATAAGCAAAGGTTGCACGTGCGACCTCCCCACGTTACGTAAACATGCTTCTCTTGCATGGTTTCTAAACTGCACGGCCGCGCGGCGTGACATTGCTAACAATTGCAACAACATGCTATTTTTGTGCTTCTGTGACGCACATTTCCTGATAATGCTCAACTCTAAGAATCGTACTTGAACGTATTGGACTAACTGCTGTGTTCCTCTGAGGCGTATGGTCTTGCTCTTTCTTACACACATTATACTAGAGGAATTCACCAGCGTTACTACCAAGAGACCCATAAGTAGGATAAGCTCAATTTGACTTATAGTCGTAGCCCTTTTTGCCGACCCTTATCGGTACCATCCCCGCGGTTCGTTGTTCGGTCCAAAGACCCCTTGGCGTTGGGCCTTGGCCTCTTCTGTGATGTCCGCGAATATCCTATTAATCTAGCAGTGGTTTTTGCAATCACAAACATGCAAGTAGCCAAGTACAAGATGCTATTGGACAGCCAAATTCTACGCGAAGCACGAGAAGCTCACTGTAAATTCGGAGATTCGTTGCGGAGTACGGTGACGCACTTCGCTGTGTCAGGGGGGTTCTTGATATGTGATTAACCAGCAGTCCTCGTAGGCTCGTCGCCGCGCCTTACCTTGCCCTATAGGCGACGCGCCATACCAGTAGTTCCTTATGTACCTAGATTACGGTATTAGCATCGCCCGATTCAACAGCTACGAGTTTGTATGATAGCGGGGATAAATCTACGTTGGATTACGGTGCTCCAATAATTGAGCTGGAGCTGGCTGACCGGGGTGATAAGAGTCGGAGCTGACTTAGGCTGCTATCCCCTCTTCCTTATTATAATGGCTGTGACAAGGTGGTTCGCGGCACCCACAGCTTCAGTTTAACTAGCGGCACTACCGGTGACGATTCACTCCGCGTCATGTGGCCAGGCGCCGGATCAAGAATAACGCATGCGATAAGCTATCACGAAAGTTTAAAACGATTTGCGCTACCACTTCAAGGGAAGATCGTATGGCATCAAGCCAGAAACAACCCAAGCAGTAGAGCAAATAACCGTACAGGTCAGGCTACCTACGATGGCTAAGTAACTCATTATAGTACAACCATGAACCCACTAGGTACACACGTTTTCCCACCGTTCACAAGTAAAGGAAGAAAGAATGTGGGCTAGCTATACGTGATAATCGGGGGCGTATCCTGTGGTGATGTCAAAGAGTGCTTTGACAGGCGTAACTATGGCGTGCCTTTACATGACTCTGTAATCACGTTCATATCCCATGTCTTAACCCATTTTTAGACGCTCTGCGCCTAAACGGGGAAATAACAACTGCAGGTATTAGACAACGGAGGTTGAATCGGAATCACAGAGGTAGGACCGGGTTTGTATACAAGGCTATCGTTGCGAGGAATAAGCAATGACGGAGGATGGTAACATATCCTGTCCTCCCCGAAATCCGACCTACGAATGAGAGATGTCGCTCTTGCAATTAGCGAATCAGTCTAAAACCCGATTCGTTAACGCGCATTTACACTTTTCTTCAAGAGAGAAGACTTGAACAGGCCATGCTAGTGCCTGAGAGATTCCGAGAACAGGCAGAGATGGTCGATCCCCGTTTAGCTGTGTGCCCACGTACGTAGGGATATAATTGCGGGGACCGCCCTGATATGCTCTGCGACCGTACCATGACGGGGAATATGAACCCACGACCTATTGCCGAGAAGTAACGCCTTCGCCCCGTAGGCATTCTGTAACATAGTGCAGCGCGGCTGGTCAGAATACTCCCCGGCTTACTTTGGTTTAGGAAATAGATCAACTTTTCGAGAGTTCGGGATAACCCAACATATACAGCAAAAGTTCCATGCTGCCAATCGTTGAAAGGACTAACTTGTGAAAAGCTATATTCGTAGAAGTGTTGTGCATGCTACAGGCGCGGTAAATATTGTTCGATTCATGCCAGAGGAAGGCTCAACCGACATCTCATGGTTCAGTTCATACGTGAAACTACTCAAGGATAAGCGCTTTGAGAGTGCCCAGACATTTAATCGACTAAGCACTATTAACAGCCCCAACTCTCAGGCGCCGCGCGCGGATGCATACACCCCATCCAGCAAGGTACTGCCTGATAGTATGAAATTTGGTGACTGTTTCAATCGTGAGACCGCGCATCATCGTGACCGTTGAACAAGGAAATACAGGCGCTATTCGTTCCTGCTAGTTCCCCTTCGGGGTTACCTACGACCGTTCTCATCCTCCGATCCTATAAACGGTCTGAGGAAGCAGTAAGTGAGGTCAACCAACTGCGTTATAGAACAGGTTATTCGACGGTACATCTTCTAGTCTTTTTAGGTGACATCTATACGCCTGCCAGGGCTAGCGCGTGATCTCTATCGAAGTGTTGAATCTCGAATGGTGATACTCATAGCTCGACTAACTTACCGTCTTCTGCAATTCTCTGTGGGTCAAACTATTCCCGCTTGCATCCTTGCGAGGCCCTAGCGATGTCGGAGATCCCACCTCTTAGTTCCAACCAAGTCGCCTAATCGGCAGACTAGGCCCCTGTGGGCGGCGAGCAGTGTTAATCTTTGCCCATTGGCCTTCTGCAGCACATGGTATACGAGTCTAGGCATGCTACCCTTGTCTACAGCGAGGGAGACAGAGCTCGAGGCATTATAAGAGTCCGGTCTCAATTGATTTACGTGCATACGTCGACATCTATAAGCGTGAACCGTACCTCTAACGTGTTGCCCACAAGAATTTAAAACCATATTTCAGAGACATCTATTAGAGCCATAGATTGGCCCGCGCAACCTGGTAGCGCAGCCTTTTTGTAAATTCTACCGGATGGCCCATAGCTTCGTAGAAGATCAGATACTGAGTCCGCATAATCTGTAGCGCTATGAGGGAAGGGGGAACACATTCGGATGGTACCAGTGAGCCCGAACATGTAACATAGGGTTATATTTCACGGAATAAACGGGGACCGGAATGGACGTAGACCTGAGTACGATTTCTCGCGAATTGTGTCGAGCACCTTCGAACCGCTGGATCTGCTGGTGTATATGTGTCGCGCATTGCGGGAAGAGGTCTCATCAAAGGAATCAGGAAAGAGCAATCGCTACCTTATACTGAGAGCCCTTAGCTCCTCATCTCCTTCAGGCCAGTCGACTACCCAGTGCGAGAACAGTGGACCCTGATAACAGGGACCCATAATAAGGGCCAGATTATTGTGGATCGACCGACTCCGATTGCGCGCGGAGCGACGGCTACGTGACTCAGAGCTGAATTTGTAAGAAACCTCACGGGGGGACCAAGGTACGTTAGATCACCGGCCGCTTGCTGACGACACCTAAATGTAACGTTTTTAACCGCACAAGAAAGATGGAGTCAATAATCGGTGTCACCTTAGATGCGCGCTTTTCCTTAGATACAGAGTTCCTCTTACGTAACCCTCTTGGCCGTCAAACTGCAATATCAGTAGTAGAGGCTTCTAGAGAAGAACCACCTGCCCAACCATCAAACTATAAAACGTTACTTGTATAGAGCTTCGTCCAGCGACCAACGGGAAATACTCTACTTGGCCGATTCTCGAGCGAAATGTTGCGATTGTATCGAGTTCTACGAAAATAAGTTCTTCCAATGTGGCTCGATGACGGTAATGAAGCGCCACTAACAGACCATTGGAACCCGGATAGCGAGCGGACCTTTCCGGTCCAATGCTCACTGTGCTGAGACCACAACAATCCCTTGAGTGACGCTGCTGCGTCTTGTTCGGGCATATTGGTCACCTGATCCTGTATTGTTAAGTAAAATTACATCATGGTACCTGACCATTTTGTGGCCCACTCACCACTATCCGTAATTGTACATGTAAACCAAGCACGCAGGGGCAGTGCAGAGCATGCTATCAACTCCCTTTAGAGAGGTGGAGCGAACAAATTGATTGCGCATTGTATGTTTCTCATCCCGGGTGAAGTCGCCTCAAAAAAGGATACATGGCTACTGTCTCGGTTCGCGGACCTCTAGATCGCTTAGGATGTCAATTATTCCGCACGATCGTGCTGTCAAGCGTAACGCAGATGGTCGTGAACAAGGTGAACGAGTATGAGAGAGAATGGCCTAACCACAAGACGTAATACGGACATATGCAGCCGTCAACGTTCCTCTTGGAATGAAGGAATTTTCTCTGGGCAGCCCGGCTTTCGGTTCGAAATCGTCTCGATATATTTCGGGCACGAACACCGTGAGTCCGCCGACATATGAATGACGACAGCATGAATGTTTGGTGAACGTATAGCGACCAGTCGTTAGCGAGAAGAACAAGAGCACATCGAATTATGCACGACGGGCCTTGTCCACAGTTCGAAACTCTCTTTAGATCCGTAGTTAGATCCATTTCGGGGTCGCAGTCAAGCCCGTTATCTCTAATCCATGTCTCGGTCCGGCTGGGATATACTATATGTATTCATAGTGCAGACTAATAACAATAGTGTTCGCCAAGCAGTTCAACATCCTTAGGGTGCCCACATTCGCGCCTTTAACAACCGCAATGCATGTCCAGAGTGCATGTGGTCAGCTCGGCCACGCTTGGCGGTGGACATCCTTATTCTTAGGGTCCCTAATCAGATAATCGAACAATTACTTTGGCGAGTGTGGTACGCTTTGTTCAGCCATGTGTCCTGTTATCTGCCGATCGCAATCATCGATTTCTAACCCTATCCTATCGATTACAGCTGAAGCTAAATCCGCTTGTCAACCGAATACTGTGGTCATCACTCAAGGCAACGACCCACCTGTAGGTGTCCAATTGGTAACGGCAAGCTACTTTTAATATAAAGCCCAGACACCCGCGCTCTTGTAGCCACTTAGACAGCGAAGCTCTCCCGCGACGTATTATCATTGTCATTGGCCATCTGATATCATTAAGCGGCTCAGGTCTTGACCGCAACAACGGTCTGTAACACGAACTTCAACTAATTTCCAATGAGGACCAGCCTAGCAAGTGCCCCGAATTTTCTTTGGTGGGGCTTGTGAGCTTGAGGGATCTATCGTTCGTACTATGAAGCCAGTTGTTTCAATGGCTATGGCACTGAGCGTCATGTGACCTTCCTAAAGGGGGTTTTGCTAACAACTCGTCTCATTCGAACTTAACGTGTTTAAGAGATCGCCATGCCCCTGGTGCCGTTACAGCCCGGGACTGTATGACACGCCACCCAACCAGATAAGTTTCTACATCACTATAAACCAATCCATCCTCCATAGCCCCTCCATTACTTTTAGGATTTCTTGCACCCAGGCGCAAAGGTCTAGACGGCGCGTCTGCGATTACTCGTGTTCGAAGACGCGTGCTCAGAGTTCCACCCCGGACTTGTTCTTGCAGGTGCTATAAGGAAGATTGGTATACGTTGGACCTTCGCGTGGACGTGCATGGTACCCCCCCTAGCTGTAAGGGCTGGCGACACGTGCGACAACTGGGCTCGGATACACGGTGCGGATGTCCTTAGTACGGTCCAACCAAGATCAGAGGCCTAACCTGCTTGTGGTAATGGAACGGCGGCACTTAGAGTGGACAGGCGTCATTAGGTGTACCTGTCTGCACTTAATAGTCGAACTGCACTCATGGCGCATCCTGTCGGATAGAGCCTACAAAGCGTATTCCAACAGGACCAAGCCGTGTGCAAGAAGATCTCTGAATTGACTACTGTATTATGACTTCGCTAAAGGCAGCCTATTTGAAACGACGAGCTCGCGAGTGGTACTGAAGCTGCACCGGATGTGCCTAGCAAGTCTTAAGACAGGAAACCGACGTGCCCACTCCTACGGCATCTAGCTTCTTATCTCATAGAACTATAGTACGAAGCAGAAATATTATTGAGGATGTGCCAAAACGACGGCCGCTGACGACAATACTTGGCGCCTCACAACTCAGAGAACGGCCGCAAGAGCACGGCCACCGCTTATTGCTCACGGATACAGATGAAGAGCCGGTCCACAATCAGCACAGAGGGCGAGTGGCAGCCAGGCTATTGTCCGCAATGCACCATGTATACGAAGATGTTACAATTGCCCTGCGTCTCACACAGAGTTAAGTGAGCAAAGTGCTGACGCCAAACACAACTGCCGCATTTACTCTGATTGGTACAGAACAGTGTATAGCACTATGCCGCTCCCTGCGACGACAACTGCTGTAAAAGCAGATAGACGGTTTCTGATGACGAGGTAGAAGAAAAATGCTCCCCCTAACTGATACTTGCGACGTCTGGCCCTCAATAATGTTGCCCACCAGCGAGTAATGTGGACGGGTAAAGCATCCGACCAGGTGTCATGAAGCCGAAAGTGATATCTCGTCAAGCTATACCCCAGACTGCCCGTTCTCTTATGCACTTATGTACTGCATCTGTCACCAGGTAAGTGAATCCCCATCCGTTAGCTGATAGCCTTAGCGTGGAACAGCCCTCACACATTTGGCCTTGCCGAATCGAATACTATGTATTCGAGGCACTCAAAAGGCCAGGTTCTCACATTCAGTGGGACCGACGTTTATTGCCCTATTATGGTACGTATTCCCTCACCGGCAATCGGTTGTATATAGATTTGGTACCAGATGCACCTGCCCCTCACGAATTTGGAATTGGGTCCATCAAATTTTGCACTCACCTTTTAATTACTTCGTGAAATCCCATCGGCCAATCAAACCGTCCTTAGGCCGTCATCCCGAGCCGAATAGAGCAGAGCTAATGTGACAATTACCGGTTTGAATCCCTACGTCAGCGCTCGCGGGAGAAAGATTTAACCTTTACTGCCAACTCTGGAGTCTTAAACATACGCGATTTTCGCCAGGGCTGTGTCTGCCGACTAGAAACAGCGGCTTAGGCTGTTTGCTGCAGATGGGCCAGTAAACATATTCCTATCCGTAAGCCACTTAGCCATGATCTTTTAGCCGCTCCAGCTCGATCGTTCTAAAGCGGCCGTTCGGAAAAGCTACCAAGACGTGTAGTCAAGCCTGAATCTATGCGCTCGCTTAGAACACACGGCCGGACATCCGCTCATAAGCCAACCCTCGAGTCACTTCAAAAGAGGACCGTTGTCGACGTCATGTTCCCTACCTGCCTATTGATCAACGTAAGCATGTTAGTACGAAAAGGTTAGACACTTGGAGTTTTAGTGGTGCATGGAAGGTATGGAAGATGCTTGTTTTGCGTACCTATAGCGGCTTCCGGGCCTCCTGAATGACGTATTAAGCGTGGCACTACCATTTGTACAAAGTCCCTATAAGGCCTGTGGCTGCGTTCACAGCTCATTCGGGTCTCCTAGGGGCAGCGCCGCTTCGGGCAAATTGATCAAATGACCCAGTGCGCTATCAGTGTACCTCGCCATCCGCTGTTTCTGCATATGTAGGGAGGATGCGGAGCCATGGGAGCACTACTGCCAGCTGCCGACCCCGACGTTTAGGTGCCTAGGGGGGCGAAGTAGGTATCATTTGGGGTATAACACACCGAAGCGAGCACGGGAAATCACCTTTGAACTCAATTTCCTACTGTGGTCATTGATGTCGCCTGCTATCCTTAACGCACCTTTAAACCGGGACCCTGCAAGTTTATGGCTGCCGTGGTATCAGGCCCGCTAACTGTATCAATGTTAAGCCCTCCCAACATGGATCCTTTAGTTATTAAGGACAGGTTCAAAGCACCATTGCGCAACGACTGGTTGCCTCAGCCTTCTGCCCTTCAGTAGCGCAAGTGATCTGTAGGCACGGTGGTCAGTACTTAGACAGTGAGGTACTATCGACTCCTAAATGTAAGAAACTTTATTGGTTGCGCCAAGATTCGATGCAATTTGCATCTTATCTTTCCAGAGTGTAAAGTTACTCAATAGGTGAAGGGGTCTTCTGCGGTCGGTCCCCCAGCGGGAGGAAGCTAAGTTACGGAGATTGCGATCCTAATCCGCAAGCGGTGATAGGGCCCGCCTTTGGGTGTAACTCCTTTTATCGTATATAAACAGCACTGTGTTTTAATATTACACGCAACCTTTGGATAACAGCCGTTGTAACGGAACATCCGGACGCTTCGACGCGGGTCGATAAGTTTTGAGACTCAAGCTGCTAAGGCTTCCAATAAGCAAGAGAAGCGGTTTGTACAATCGTCAAGATGGCAAATACGGTTTGACCTGCGGACGCGCCAGCTGTGGTGTTCTCTATGCAGAGAGTCAATGGTCACATCCTACAGGTTAAGCAATCCCGCCTTATTACCCCACCGTACGTTCTTTTCGACTAACTGGAAACAAGACCGACAATACAGCACCAAGGACACCGTACACAGGAGACCTGAGTCAAAACGCGGATTCCACTCGAAAAGTTAGATAATAGGTTCGAACTCTATTAGGGGCTCGATGTACACAACGAGGTGCTAGACACTACCACGGAAATTCCAAGAATACTTATCGCTCTGCCCAACAGAAGGTAGAACCTTCTTATAGTCTTGGTGAATAGATGTCCGCCTGCTATACCTATCCCCGTATGCAGCTTCTCAGTATACTCGTGACTGATAAGAAGGCTAAAATAGTACCATGCGATTGGACACCCACCAGCGGGACCCGAGTGGTCCAAGGTCCACGGTTGGATTTCTCGTCGCGAGCATCTATCGTCACGAGTGAAAGACGCGTCTCTACTTTGTGGGCTGATTCAGTTTGGAAGCCTGCGGACTAATCCACATACAGTAGTCGAGGGACATGGAGCACCGTAGTGGACTGCGCCTTGGGGTATTTCTCAGATGATTTGCCCGGAGTACGGGGCTTTAGGGATAGCCGAGTTGAAGACTCACCAATTTTGCTCTTACCCGAGAAAATATGCTAGCCTAGAGCACGTCTCATTAAATAGTATCGACTGTGCCGGTTCCGGGCTCACAATCACGTAGTACTGCGGGTAGTATTTCTACTCTCCACACCTTTGAGATATTCTGTGATCTGGGGCTTATCAAGGACCTTTTAGTCGATGAGGTCTATTGCGGACGAGTTGGCCTCACACACGCATTCAAGTGCCTGTTAGAGCAACTGAATTCAGTAAGCGCTCATACTGCCTGCTTTCTCAAATCCGTTTACGCGGCGGCAAACTAGTTTCCAGGCTTAGCGTATACGGTGAACTAGAAATGGTCTCGAACTGCAACACTTGTCTCACATCGAAACAAGATCGTTTCAGATATTACCTTTATGGCTTTGACTCCCTGTTTTCCACCATAGAGCTGTTAATTCGTAATCCGCGCCCACATTAATCCCACACTACATTATACAACTGCTGCACATTCACGATATTATGTGTATAGTGGCAGCTTAACGCTCCATACAAAAGTCGCATTATCATTAGGAATTACATGCTTTGAAAAAAGGTAATCGTTACCACATGGCAGTAATTTCGTCCGTTTCGGGACGCCACTGAACGCGGCTAAATTCACTCAAAGCCGCACGTGTTCCCAGCGAACTATACTACGTGTAGATTGACGCTAGAACACAAAGCTAGAGAGACATCGAAGCTCTGGGTACCAGTGAACTTACAGGGCCCCCCGTGAGGGTCCGGGATCAAACTAATCGCCACCCTTGAGGCGTCTGTATTTTCTGGCCTCGCACTGACTTTGCCTGGGTCTTCTATACGACGGCCAAAACCTCGAGACAGAAATTGTTCTTCATTCCCGAGTACATGTCAGTACGTGGACTACAAATCGTCCTAAGGAGACATATTTTAAGTCATAGTGTACTGTATCTACGGCTAAACAACCTGCATGCTGTCTAGACGGCTAATAAAGTGCGCTCAAAAAATCTACAATGATAGTAGTCTACTAGGAACAAGTGCGCCATTTTCCTAATGACAACGCACGTTTACTCATTTGTCCGTTCGTTGAGTCACATAATGCGGCATAGCATTTCGATAAGATATCTACAGATCCCAGGAAATGGCGTCTGAAGCAGGCAACTCTCACCACGACGACGCGTCCGATTCTCCCGGCCAATAACTAAAGAAATACTGGCGCTTAGTGCTTGGATGATCTAAATGTACCTCTCTTTAACGGCTCTTAGGAAGCGACATGTGCGACCCTCCGCAGAACATTTGGTCCACTTGACAACCGGACGTAAGGCTATAAGGCAGTCCGCAACGACCGCGGACAATTAGGTGAAATGGTGTCACGAGCAGTAACTGAGCCACTTGTGTTGCGATTAGAATTATTCGAGGAGTCGCCCTAGTAACAGTCTGTGAACGCGACCACGCCCGTTGGTTCAATCTCGTTGTATCAACTTGGGTGAAGTATGAAAAATGATCTCCGTCCGGATCCTAGGGCATTCTGCACGGAGAGGATATTTGTGTATCCGCCTAACGGCGGAAGCCCTCGACTAGCTGCCTGACTGCGCGCCGTGCGGACCAAGGGGGGCATCACGTCTATTAGACTTAACAGTATATGCCTTAATAAGATGGAGTAGTTCTTGGCAAGGTAAATTTCATATACTGATACGGACAGTGGTATCCCCAAAGTGTGCCCGAATCGTCCATGTGTAACGAGCTCGTTGCAGTGACCCAGCGAATGTGGTCGGTTAAGGGGTAAGTGTGGAGGCTATAAGTCCCACAGGATGCATATCATTGTACTTGCTGTTTACTACCTCCTATGTACGCGAGACAATGACCCCTCCGTACAAGCTAGGTGACATGGAGGACCAGGATACGCCGATCTCCGCGGAAGATTGCTCGCATGGCACTGTCTAATCTAGTTAAGATGTGCGAGCCCAATTAATATGGACCAAGTACTAAGTTTATATGACGAATAACAAGGCAATGGTTCAGCCAGTATGCGCAGACAATAATAAGGATAGGTATCTACTTTCTTGTTAACCTAGGATAGACGCAAAGACGACGGCTTGTATGACCCTGCTACGTGACAGGTAACCGCGCTAACATGAGATGCGCGTGACCGAGACCGTCTAGAACTAAAAGCAGAGGTTGCTCTGGACCGCTCCACTGTATTTGGGCTACGGATTCAGGACTTGGTAAAACTTACACTTGTAGCTGATAGCAGGCATCTGATCACGGAGCCCTATCGGATTTGATTATACATTTCCACCCTGTACGCTTTTGTCCGAACTCAAGCTAAAATAGAAGAGTCCAACCCCCCTACGAATCTCCTAACTTCGGATACGGGAGGCTAGTTTGACACGATATCAGACGGAGAACAGCCATTACATGAGGACACGCGGCATTACCCGATTCACGCGTGGGGACGTGGATTTTA",4)
with open('/home/lgutierrezfunderburk/Documents/approx.txt',"w") as myfile:
for number in test:
myfile.write("%s " % number)
In [146]:
res = ApproximatePatternCount("TACGCATTACAAAGCACA","AA",1)
print(res)
res_2 = Hamming_Distance("TGACCCGTTATGCTCGAGTTCGGTCAGAGCGTCATTGCGAGTAGTCGTTTGCTTTCTCAAACTCC","GAGCGATTAAGCGTGACAGCCCCAGGGAACCCACAAAACGTGATCGCAGTCCATCCGATCATACA")
print(res_2)
In [ ]: