notebook.community

Edit and run



In [1]:

    
from Bio import motifs
import numpy as np
with open("../plot_playground/meme_out/meme.txt") as handle:
    m = motifs.parse(handle, "meme")



In [3]:

    
def calc_info_matrix(motif):
    pwm = motif.pwm
    bases = pwm.keys()
    info_matrix = [2+sum([pwm[b][l]*np.nan_to_num(np.log2(pwm[b][l])) for b in bases]) for l in range(0, len(motif))]
    return info_matrix

def approximate_error(motif):
    pwm = motif.pwm
    bases = pwm.keys()
    n = sum(motif.counts[bases[0]])
    info_matrix = calc_info_matrix(motif)
    approx_correction = (len(bases)-1)/(2 * np.log(2) * n)
    print approx_correction
    corrected_matrix = [2-approx_correction+sum([pwm[b][l]*np.nan_to_num(np.log2(pwm[b][l])) for b in bases]) for l in range(0, len(motif))]
    return corrected_matrix

def calc_relative_information(motif, approx_correction=True):
    pwm = motif.pwm
    bases = pwm.keys()
    if approx_correction:
        info_matrix = approximate_error(motif)
    else:
        info_matrix = calc_info_matrix(motif)
    relative_info = {base: [prob*info for prob,info in zip(pwm[base], info_matrix)]  for base in bases}
    return relative_info
    
def exact_correction(motif):
    pwm = motif.pwm
    bases = pwm.keys()
    na = sum(motif.counts['A'])
    n = na//10
    nc = 0
    ng = 0
    nt = 0
    done = False
    exact_error = 0
    while not done:
        exact_error += sum([-p*np.log2(p) for p in [na/n, nc/n, ng/n, nt/n]])
        if nt<=0:
            ## iterate inner loop            
            if ng > 0:
                ## g => t
                ng = ng - 1
                nt = nt + 1
            elif nc > 0:
                ## c -> g 
                nc = nc - 1;
                ng = ng + 1;
            else:
                ## a->c
                na = na - 1
                nc = nc + 1
        else:
            if ng > 0:
                ## g => t
                ng = ng - 1 
                nt = nt + 1
            elif nc > 0:
                ## c => g; all t -> g
                nc = nc - 1
                ng = nt + 1
                nt = 0
            elif na > 0:
                ## a => c; all g,t -> c
                nc = nt + 1
                na = na - 1
                nt = 0
            else:
                done = True
    print (exact_error)



In [ ]:

    
exact_correction(m[0])









    



/home/saket/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:38: RuntimeWarning: divide by zero encountered in log2
/home/saket/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:38: RuntimeWarning: invalid value encountered in double_scalars



In [ ]: