In [1]:
import matplotlib
import Bio.SeqIO
import Bio.Restriction
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import numpy as np

In [2]:
def findNoOfFragments(fname):
    lengths = []
    lines = [line.strip() for i,line in enumerate(open(fname)) if (i%3==1)]       
    for line in lines:
         lengths.append((len(line.split("\t"))) - 2)
    count = 0.0
    for length in lengths:
        count += length
    print(count/len(lines))
    plt.hist(lengths, bins = 50)

In [3]:
def findInsertionDeletion(fname):
    deletions = 0
    insertions = 0
    for line in open(fname):
        deletions += int(line.strip().split(" ")[2])
        insertions += int(line.strip().split(" ")[6])
    print("Deletions: "+ str(deletions))
    print("Insertions: "+ str(insertions))

In [5]:
def noOfFragments(line):
    return((len(line.strip().split("\t"))) - 2)

def findTotalErrors(file1, file1_w_e):
    errors = []
    e_count = 0
    lines = [line.strip() for i,line in enumerate(open(file1)) if (i%3==1)]
    lines_w_e = [line.strip() for i,line in enumerate(open(file1_w_e)) if (i%3==1)]
    for i in range(len(lines)):
        errors.append(noOfFragments(lines_w_e[i]) - noOfFragments(lines[i]))
    for e in errors:
        e_count += int(e)
    return(e_count)

In [8]:
def noOfFragments(line):
    return((len(line.strip().split("\t"))) - 2)

def signmaS(length):
    singma = float((0.2*0.2) + (length * 0.1 * (-0.1)) + (length*length*0.04*0.04))
    return(singma)

def giveBellRange(sigma):
    return(4.0 * math.sqrt(sigma))

def findErrors(read, c_read):
    deletion_index = []
    deletion_count = 0
    
    c_frag = c_read.strip().split("\t")
    del c_frag[0:2]
    c_frag = [float(f) for f in c_frag]
    
    frag = read.strip().split("\t")
    del frag[0:2]
    frag = [float(f) for f in frag]
    j = 0
    pad = 0
    for i, c_v in enumerate(c_frag):
        c_v = c_v + pad
        d3 = giveBellRange(signmaS(c_v))
        if(j>=len(frag)):
            break;
        #print(str(i)+" => "+str(frag[j]) + " "+ str(c_v - d3) + " "+ str(c_v + d3) + " d3: "+str(d3)+" c_v: "+str(c_v))
        if(frag[j] <= (c_v + d3) and frag[j] >= (c_v - d3)):
            j+=1
            pad = 0
        elif((c_v - d3) > frag[j]):            
            pad = c_frag[i]
            d3 = giveBellRange(signmaS(c_frag[i]))
            if(frag[j] <= (c_frag[i] + d3) and frag[j] >= (c_frag[i] - d3)):
                j+=1        
                pad = 0
        else:
            pad = pad + c_frag[i]
            deletion_index.append(i)
            deletion_count+=1     
            
    return([deletion_count,deletion_index, (noOfFragments(c_read) - noOfFragments(read))])

def errors(fname, c_fname):    
    lines = [line for i,line in enumerate(open(fname)) if (i%3==1)]
    c_lines = [line for i,line in enumerate(open(c_fname)) if (i%3==1)]
    notFineList = []
    FineList = []
    
    #print(lines[9])
    #print(c_lines[9])
    #print(findErrors(lines[9], c_lines[9]))
    #return    
    for i,line in enumerate(lines):
        #print(lines[i])
        #print(c_lines[i])
        #print(findErrors(lines[i], c_lines[i]))
        #print(i)
        E = findErrors(lines[i], c_lines[i])
        E = [i] + E # add read number
        if(E[1] == E[3]):
            FineList.append(E)
        else:
            notFineList.append(E)            
    return([FineList, notFineList])

In [11]:
def generateRandomList(listv, count):
    randomList = []
    randomListIndex = []
    index = range(0, len(listv))
    for c in range(0, count):               
        i = random.choice(index)
        randomList.append(listv[i])
        randomListIndex.append(i)
        index.remove(i)
    randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
    return(randomListIndex, randomList)

def deleteRandomly(listv, count):
    deleted = generateRandomList(listv, count)
    deleted_index = list(deleted[0])
    deleted_values = list(deleted[1])    
    for d in deleted_values:
        listv.remove(d)
    return(listv, deleted_index, deleted_values)

In [ ]: