# CG_ErrorCorrect

``````

In :

def neighbors1mm(kmer, alpha):
''' Generate all neighbors at Hamming distance 1 from kmer '''
neighbors = []
for j in range(len(kmer)-1, -1, -1):
oldc = kmer[j]
for c in alpha:
if c == oldc: continue
neighbors.append(kmer[:j] + c + kmer[j+1:])
return neighbors

``````
``````

In :

neighbors1mm('CAT', 'ACGT')

``````
``````

Out:

['CAA', 'CAC', 'CAG', 'CCT', 'CGT', 'CTT', 'AAT', 'GAT', 'TAT']

``````
``````

In :

''' Return k-mer histogram and average # k-mer occurrences '''
kmerhist = {}
kmerhist[kmer] = kmerhist.get(kmer, 0) + 1
return kmerhist

``````
``````

In :

khist = kmerHist(['CAT' * 10], 3)
khist

``````
``````

Out:

{'ATC': 9, 'CAT': 10, 'TCA': 9}

``````
``````

In :

def correct1mm(read, k, kmerhist, alpha, thresh):
''' Return an error-corrected version of read.  k = k-mer length.
kmerhist is kmer count map.  alpha is alphabet.  thresh is
count threshold above which k-mer is considered correct. '''
# Iterate over k-mers in read
# If k-mer is infrequent...
if kmerhist.get(kmer, 0) <= thresh:
# Look for a frequent neighbor
for newkmer in neighbors1mm(kmer, alpha):
if kmerhist.get(newkmer, 0) > thresh:
# Found a frequent neighbor; replace old kmer
# with neighbor
break

``````
``````

In :

correct1mm('CAT', 3, khist, 'ACGT', 2)

``````
``````

Out:

'CAT'

``````
``````

In :

correct1mm('CTT', 3, khist, 'ACGT', 2)

``````
``````

Out:

'CAT'

``````