In [1]:
import matplotlib
import Bio.SeqIO
import Bio.Restriction
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import numpy as np
In [2]:
def findNoOfFragments(fname):
lengths = []
lines = [line.strip() for i,line in enumerate(open(fname)) if (i%3==1)]
for line in lines:
lengths.append((len(line.split("\t"))) - 2)
count = 0.0
for length in lengths:
count += length
print(count/len(lines))
plt.hist(lengths, bins = 50)
In [3]:
def findInsertionDeletion(fname):
deletions = 0
insertions = 0
for line in open(fname):
deletions += int(line.strip().split(" ")[2])
insertions += int(line.strip().split(" ")[6])
print("Deletions: "+ str(deletions))
print("Insertions: "+ str(insertions))
In [5]:
def noOfFragments(line):
return((len(line.strip().split("\t"))) - 2)
def findTotalErrors(file1, file1_w_e):
errors = []
e_count = 0
lines = [line.strip() for i,line in enumerate(open(file1)) if (i%3==1)]
lines_w_e = [line.strip() for i,line in enumerate(open(file1_w_e)) if (i%3==1)]
for i in range(len(lines)):
errors.append(noOfFragments(lines_w_e[i]) - noOfFragments(lines[i]))
for e in errors:
e_count += int(e)
return(e_count)
In [8]:
def noOfFragments(line):
return((len(line.strip().split("\t"))) - 2)
def signmaS(length):
singma = float((0.2*0.2) + (length * 0.1 * (-0.1)) + (length*length*0.04*0.04))
return(singma)
def giveBellRange(sigma):
return(4.0 * math.sqrt(sigma))
def findErrors(read, c_read):
deletion_index = []
deletion_count = 0
c_frag = c_read.strip().split("\t")
del c_frag[0:2]
c_frag = [float(f) for f in c_frag]
frag = read.strip().split("\t")
del frag[0:2]
frag = [float(f) for f in frag]
j = 0
pad = 0
for i, c_v in enumerate(c_frag):
c_v = c_v + pad
d3 = giveBellRange(signmaS(c_v))
if(j>=len(frag)):
break;
#print(str(i)+" => "+str(frag[j]) + " "+ str(c_v - d3) + " "+ str(c_v + d3) + " d3: "+str(d3)+" c_v: "+str(c_v))
if(frag[j] <= (c_v + d3) and frag[j] >= (c_v - d3)):
j+=1
pad = 0
elif((c_v - d3) > frag[j]):
pad = c_frag[i]
d3 = giveBellRange(signmaS(c_frag[i]))
if(frag[j] <= (c_frag[i] + d3) and frag[j] >= (c_frag[i] - d3)):
j+=1
pad = 0
else:
pad = pad + c_frag[i]
deletion_index.append(i)
deletion_count+=1
return([deletion_count,deletion_index, (noOfFragments(c_read) - noOfFragments(read))])
def errors(fname, c_fname):
lines = [line for i,line in enumerate(open(fname)) if (i%3==1)]
c_lines = [line for i,line in enumerate(open(c_fname)) if (i%3==1)]
notFineList = []
FineList = []
#print(lines[9])
#print(c_lines[9])
#print(findErrors(lines[9], c_lines[9]))
#return
for i,line in enumerate(lines):
#print(lines[i])
#print(c_lines[i])
#print(findErrors(lines[i], c_lines[i]))
#print(i)
E = findErrors(lines[i], c_lines[i])
E = [i] + E # add read number
if(E[1] == E[3]):
FineList.append(E)
else:
notFineList.append(E)
return([FineList, notFineList])
In [11]:
def generateRandomList(listv, count):
randomList = []
randomListIndex = []
index = range(0, len(listv))
for c in range(0, count):
i = random.choice(index)
randomList.append(listv[i])
randomListIndex.append(i)
index.remove(i)
randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
return(randomListIndex, randomList)
def deleteRandomly(listv, count):
deleted = generateRandomList(listv, count)
deleted_index = list(deleted[0])
deleted_values = list(deleted[1])
for d in deleted_values:
listv.remove(d)
return(listv, deleted_index, deleted_values)
In [ ]: