In [ ]:
%matplotlib inline
#filename_call = '../../data/SingleCell/Undetermined_S0_L002_R1_001.pos_call'
filename_call = '../../data/SingleCell/tmp.pos_call'
f_call = open(filename_call,'r')
headers = f_call.readline().strip().split("\t")
print(headers)
pos_list= []
N_list = []
A_list = []
T_list = []
G_list = []
C_list = []
for line in f_call:
tokens = line.strip().split("\t")
tmp_sum = sum([int(x) for x in tokens[1:]])
pos_list.append( int(tokens[0]) )
tmp_A_freq = float(tokens[1])/tmp_sum
A_list.append( tmp_A_freq )
tmp_T_freq = float(tokens[2])/tmp_sum + tmp_A_freq
T_list.append( tmp_T_freq )
tmp_G_freq = float(tokens[3])/tmp_sum + tmp_T_freq
G_list.append( tmp_G_freq )
tmp_C_freq = float(tokens[4])/tmp_sum + tmp_G_freq
C_list.append( tmp_C_freq )
tmp_N_freq = float(tokens[5])/tmp_sum + tmp_C_freq
N_list.append( tmp_N_freq )
f_call.close()
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12,4))
ax1 = fig.add_subplot(1,1,1)
ax1.bar(pos_list, N_list, facecolor='black')
ax1.bar(pos_list, C_list, facecolor='red')
ax1.bar(pos_list, G_list, facecolor='green')
ax1.bar(pos_list, T_list, facecolor='orange')
ax1.bar(pos_list, A_list, facecolor='blue')
ax1.set_xticks(pos_list)
ax1.set_yticks([0, 0.25, 0.50, 0.75, 1.0])
ax1.grid()
ax1.set_ylabel("BaseCall Proportion")
ax1.set_xlabel("Position (bp): read length is %d bp"%(len(A_list)))
plt.show()
In [ ]: