In [1]:
from Bio import motifs
import numpy as np
with open("../plot_playground/meme_out/meme.txt") as handle:
m = motifs.parse(handle, "meme")
In [3]:
def calc_info_matrix(motif):
pwm = motif.pwm
bases = pwm.keys()
info_matrix = [2+sum([pwm[b][l]*np.nan_to_num(np.log2(pwm[b][l])) for b in bases]) for l in range(0, len(motif))]
return info_matrix
def approximate_error(motif):
pwm = motif.pwm
bases = pwm.keys()
n = sum(motif.counts[bases[0]])
info_matrix = calc_info_matrix(motif)
approx_correction = (len(bases)-1)/(2 * np.log(2) * n)
print approx_correction
corrected_matrix = [2-approx_correction+sum([pwm[b][l]*np.nan_to_num(np.log2(pwm[b][l])) for b in bases]) for l in range(0, len(motif))]
return corrected_matrix
def calc_relative_information(motif, approx_correction=True):
pwm = motif.pwm
bases = pwm.keys()
if approx_correction:
info_matrix = approximate_error(motif)
else:
info_matrix = calc_info_matrix(motif)
relative_info = {base: [prob*info for prob,info in zip(pwm[base], info_matrix)] for base in bases}
return relative_info
def exact_correction(motif):
pwm = motif.pwm
bases = pwm.keys()
na = sum(motif.counts['A'])
n = na//10
nc = 0
ng = 0
nt = 0
done = False
exact_error = 0
while not done:
exact_error += sum([-p*np.log2(p) for p in [na/n, nc/n, ng/n, nt/n]])
if nt<=0:
## iterate inner loop
if ng > 0:
## g => t
ng = ng - 1
nt = nt + 1
elif nc > 0:
## c -> g
nc = nc - 1;
ng = ng + 1;
else:
## a->c
na = na - 1
nc = nc + 1
else:
if ng > 0:
## g => t
ng = ng - 1
nt = nt + 1
elif nc > 0:
## c => g; all t -> g
nc = nc - 1
ng = nt + 1
nt = 0
elif na > 0:
## a => c; all g,t -> c
nc = nt + 1
na = na - 1
nt = 0
else:
done = True
print (exact_error)
In [ ]:
exact_correction(m[0])
In [ ]: