In [36]:
#2017-2-6
#DNA alignment
seq1 = "ATTTCTGCTAGCTGA"
seq2 = "ATTATGTAGACTGCTT"
n = 0
m = 0
seq1L = []
seq2L = []
bondL = []
seq1len = len(seq1)
seq2len = len(seq2)
print(seq1len, seq2len)


(15, 16)

In [56]:
#Under condition of one nucleotide mutation, equal sequence length
while True:
    if n < seq1len and n < seq2len:
        nuc1 = seq1[n]
        nuc2 = seq2[n]
        #when identical
        if nuc1 == nuc2:
            seq1L.append(nuc1)
            seq2L.append(nuc2)
            bondL.append("|")
            n = n + 1
        #when SNP
        elif nuc1 != nuc2 and seq1[n+1] == seq2[n+1]:
            seq1L.append(nuc1)
            seq2L.append(nuc2)
            bondL.append(" ")
            n = n + 1
        else:
            break

    else:
        break

In [37]:
#Under condition of one mutation or indel, length between sequecnes may not be equal
#Note: this verstion assumes no SNP or Indel occurs at the first nucleotide
#claim pair, SNP and indel stats variables
NoMatch = 0
NoSNP = 0
NoINDEL = 0
while True:
    if n < seq1len and m < seq2len:
        nuc1 = seq1[n]
        nuc2 = seq2[m]
        #when identical
        if nuc1 == nuc2:
            seq1L.append(nuc1)
            seq2L.append(nuc2)
            bondL.append("|")
            n = n + 1
            m = m + 1
            NoMatch = NoMatch + 1          
        #when SNP, occurs at the end
        elif nuc1 != nuc2 and n+1 == seq1len or m+1 == seq2len:
            seq1L.append(nuc1)
            seq2L.append(nuc2)
            bondL.append(" ")
            n = n + 1
            m = m + 1
            NoSNP = NoSNP + 1
        #when SNP, occurs before end
        elif nuc1 != nuc2 and seq1[n+1] == seq2[m+1]:
            seq1L.append(nuc1)
            seq2L.append(nuc2)
            bondL.append(" ")
            n = n + 1
            m = m + 1
            NoSNP = NoSNP + 1
        #when insertion in seq2
        elif nuc1 != nuc2 and nuc1 == seq2[m+1]:
            seq1L.append("-")
            seq2L.append(nuc2)
            bondL.append(" ")
            m = m + 1
            NoINDEL = NoINDEL + 1
        #when deletion in seq2
        elif nuc1 != nuc2 and seq1[n+1] == nuc2:
            seq1L.append(nuc1)
            seq2L.append("-")
            bondL.append(" ")
            n = n + 1
            NoINDEL = NoINDEL + 1
        else:
            break
    else: #when at least one of  the seq ends
        #when seq2 ends but not seq1
        if n < seq1len:
            nuc1 = seq1[n]
            seq1L.append(nuc1)
            seq2L.append(" ")
            bondL.append(" ")
            n = n + 1
        #when seq1 ends but not seq2
        elif m < seq2len:
            nuc2 = seq2[m]
            seq1L.append(" ")
            seq2L.append(nuc2)
            bondL.append(" ")
            m = m + 1
        else:
            break

In [38]:
seq1N = "".join(seq1L)
seq2N = "".join(seq2L)
bondLN = "".join(bondL)
print(seq1N+"\n"+bondLN+"\n"+seq2N)


ATT-TCT-G-CTAGCTGA
||| | | | || |||  
ATTATGTAGACT-GCTT 

In [39]:
print(seq1+"\n"+seq1N)


ATTTCTGCTAGCTGA
ATT-TCT-G-CTAGCTGA

In [40]:
print(seq2 + "\n"+ seq2N)


ATTATGTAGACTGCTT
ATTATGTAGACT-GCTT 

In [41]:
print("Stats: "+str(NoMatch)+" matches; "+str(NoSNP)+" SNPs; "+str(NoINDEL)+" indels")


Stats: 11 matches; 2 SNPs; 4 indels

In [81]:
m


Out[81]:
17

In [ ]: