In [4]:
def readBases(filename):
    
    first_string = "" #initialize empty strings
    second_string = "" #initialize empty strings
    counter = 1 
    
    with open(filename, 'r') as f:
        for line in f:
            if counter % 2 != 0: #using for multiple lines in a file, even are for second string
                first_string += line.rstrip()
            else:
                second_string += line.rstrip()
            
            counter += 1
    
    return [first_string, second_string] #return a list

In [5]:
def compareBases():
    
    strings = readBases('bases.bin') #got a list of strings
    hamming_distance = 0
    
    if(len(strings[0]) != len(strings[1])): #if length does not match we simply return
        return -1;
    
    for i in range(len(strings[0])): #this to get the range of first string so that we can access by index
        if(strings[0][i] != strings[1][i]): #actual comparison
            hamming_distance += 1
    return hamming_distance

In [6]:
compareBases()


Out[6]:
486

In [ ]: