Calculate the percentage of incorrectly attributed reads in the following file for sample 1 and sample2
The percentage of incorectly attributed read is then ploted with matplotlib according to the amount of reads found
In [1]:
with open('./jeter.tsv', 'r') as file:
for i in range (10):
print (next(file))
Import packages
In [3]:
%pylab inline
import csv
import matplotlib.pyplot as plt
Create lists to store results for different thresholds from 0 to 25%
In [4]:
res_s1_sup_s2 = [0 for i in range (26)]
res_s2_sup_s1 = [0 for i in range (26)]
res_s1_sup_other = [0 for i in range (26)]
res_s2_sup_other = [0 for i in range (26)]
parse file and populate the list
In [5]:
with open('./jeter.tsv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
# remove header
next(reader)
# iterate over rows
for R in reader:
s1_sup_s2 = float(R[6])*100/(float(R[5])+float(R[6])+float(R[7]))
s2_sup_s1 = float(R[9])*100/(float(R[8])+float(R[9])+float(R[10]))
s1_sup_other = float(R[7])*100/(float(R[5])+float(R[6])+float(R[7]))
s2_sup_other = float(R[10])*100/(float(R[8])+float(R[9])+float(R[10]))
for seuil in range (26):
if s1_sup_s2 >= seuil:
res_s1_sup_s2[seuil]+=1
if s2_sup_s1 >= seuil:
res_s2_sup_s1[seuil]+=1
if s1_sup_other >= seuil:
res_s1_sup_other[seuil]+=1
if s2_sup_other >= seuil:
res_s2_sup_other[seuil]+=1
In [6]:
print (res_s1_sup_s2)
print (res_s2_sup_s1)
print (res_s1_sup_other)
print (res_s2_sup_other)
In [31]:
plt.figure(figsize=(20, 10))
plt.title("percentage of samples with a number of read with incorect an genotype corresponding to another sample from the same lane or a randon error")
plt.xlabel("Percentage of sample")
plt.ylabel("Number of read with incorrect genotype")
plt.ylim(1,77869)
line1 = plt.semilogy(res_s1_sup_s2, 'b', label = 'reads_sample1_supporting_sample2')
line2 = plt.semilogy(res_s2_sup_s1, 'g', label = 'reads_sample2_supporting_sample1')
line3 = plt.semilogy(res_s1_sup_other, 'r', label = 'reads_sample1_supporting_others')
line4 = plt.semilogy(res_s2_sup_other, 'm', label = 'reads_sample2_supporting_others')
plt.legend(loc='best')
Out[31]: