In [36]:
#2017-2-6
#DNA alignment
seq1 = "ATTTCTGCTAGCTGA"
seq2 = "ATTATGTAGACTGCTT"
n = 0
m = 0
seq1L = []
seq2L = []
bondL = []
seq1len = len(seq1)
seq2len = len(seq2)
print(seq1len, seq2len)
In [56]:
#Under condition of one nucleotide mutation, equal sequence length
while True:
if n < seq1len and n < seq2len:
nuc1 = seq1[n]
nuc2 = seq2[n]
#when identical
if nuc1 == nuc2:
seq1L.append(nuc1)
seq2L.append(nuc2)
bondL.append("|")
n = n + 1
#when SNP
elif nuc1 != nuc2 and seq1[n+1] == seq2[n+1]:
seq1L.append(nuc1)
seq2L.append(nuc2)
bondL.append(" ")
n = n + 1
else:
break
else:
break
In [37]:
#Under condition of one mutation or indel, length between sequecnes may not be equal
#Note: this verstion assumes no SNP or Indel occurs at the first nucleotide
#claim pair, SNP and indel stats variables
NoMatch = 0
NoSNP = 0
NoINDEL = 0
while True:
if n < seq1len and m < seq2len:
nuc1 = seq1[n]
nuc2 = seq2[m]
#when identical
if nuc1 == nuc2:
seq1L.append(nuc1)
seq2L.append(nuc2)
bondL.append("|")
n = n + 1
m = m + 1
NoMatch = NoMatch + 1
#when SNP, occurs at the end
elif nuc1 != nuc2 and n+1 == seq1len or m+1 == seq2len:
seq1L.append(nuc1)
seq2L.append(nuc2)
bondL.append(" ")
n = n + 1
m = m + 1
NoSNP = NoSNP + 1
#when SNP, occurs before end
elif nuc1 != nuc2 and seq1[n+1] == seq2[m+1]:
seq1L.append(nuc1)
seq2L.append(nuc2)
bondL.append(" ")
n = n + 1
m = m + 1
NoSNP = NoSNP + 1
#when insertion in seq2
elif nuc1 != nuc2 and nuc1 == seq2[m+1]:
seq1L.append("-")
seq2L.append(nuc2)
bondL.append(" ")
m = m + 1
NoINDEL = NoINDEL + 1
#when deletion in seq2
elif nuc1 != nuc2 and seq1[n+1] == nuc2:
seq1L.append(nuc1)
seq2L.append("-")
bondL.append(" ")
n = n + 1
NoINDEL = NoINDEL + 1
else:
break
else: #when at least one of the seq ends
#when seq2 ends but not seq1
if n < seq1len:
nuc1 = seq1[n]
seq1L.append(nuc1)
seq2L.append(" ")
bondL.append(" ")
n = n + 1
#when seq1 ends but not seq2
elif m < seq2len:
nuc2 = seq2[m]
seq1L.append(" ")
seq2L.append(nuc2)
bondL.append(" ")
m = m + 1
else:
break
In [38]:
seq1N = "".join(seq1L)
seq2N = "".join(seq2L)
bondLN = "".join(bondL)
print(seq1N+"\n"+bondLN+"\n"+seq2N)
In [39]:
print(seq1+"\n"+seq1N)
In [40]:
print(seq2 + "\n"+ seq2N)
In [41]:
print("Stats: "+str(NoMatch)+" matches; "+str(NoSNP)+" SNPs; "+str(NoINDEL)+" indels")
In [81]:
m
Out[81]:
In [ ]: