In [59]:
%pylab inline
In [60]:
lines = []
last_total = -1
base_total = 0
base_kept = 0
data = []
for line in open('podar.wgs.report'):
total, kept, f = line.split()
total, kept = int(total), int(kept)
if total < last_total:
base_total = last_total
base_kept = last_kept
data.append((total + base_total, kept + base_kept))
last_kept = kept
last_total = total
diginorm = numpy.array(data)
diginorm_C1 = numpy.loadtxt('podar.wgs.C1.report')
In [69]:
m10 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m10.out')
m2 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m2.out')
m20 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m20.out')
m40 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m40.out')
m5 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m5.out')
podar_v13 = numpy.loadtxt('podar.v13.x.wgs.k20.kmers.m20.out')
podar_v4arch = numpy.loadtxt('podar.v4arch.x.wgs.k20.kmers.m20.out')
podar_v69 = numpy.loadtxt('podar.v69.x.wgs.k20.kmers.m20.out')
In [62]:
soil_m2 = numpy.loadtxt('soil.16s.x.wgs.k20.kmers.m2.out')
soil_m20 = numpy.loadtxt('soil.16s.x.wgs.k20.kmers.m20.out')
In [130]:
figsize(7, 4)
plot(diginorm[:,0] / 1e6, diginorm[:,1] / 1e6, 'b-', label='shotgun (cov=10)', linewidth=2)
plot(diginorm_C1[:,0] / 1e6, diginorm_C1[:,1]*13 / 1e6, 'r-', label='shotgun (cov=1)', linewidth=2)
plot(diginorm[:,0] / 10 / 1e6, diginorm[:,1] / 1e6, 'b--', label='shotgun (cov=10), adjusted', linewidth=3)
#title("shotgun sequencing collector's curve")
xlabel("total reads (m)")
ylabel("informative reads (m)")
legend(loc='lower right')
axis(xmax=100)
savefig('/tmp/fig1.pdf')
In [71]:
plot(m40[:,0], m40[:,3], label='min=40')
plot(m20[:,0], m20[:,3], label='min=20')
plot(m10[:,0], m10[:,3], label='min=10')
plot(m5[:,0], m5[:,3], label='min=5')
plot(m2[:,0], m2[:,3], label='min=2')
legend()
ylabel('fraction of informative 16s k-mers seen in shotgun data')
xlabel('number of shotgun reads examined')
title('podar: 16s x wgs, saturation curves')
Out[71]:
In [136]:
plot(diginorm_C1[:,0] / 1e6, diginorm_C1[:,1] / 3e5 * 11, 'r-', label='shotgun (cov=1)')
plot(diginorm[:,0] / 1e6, diginorm[:,1] / 3e5, 'b-', label='shotgun (cov=10)', linewidth=2)
#plot(m40[:,0], m40[:,3], label='16s/min=40')
plot(m20[:,0] / 1e6, m20[:,3], 'g-', label='16s (v1-3, 454)', linewidth=2)
#plot(m10[:,0], m10[:,3], label='16s/min=10')
#title('collectors\' curves: shotgun sequencing & 16s')
xlabel('number of reads (m)')
ylabel('% 16s information seen')
axis(xmax=20, ymax=100)
legend(loc='lower right')
xlabel('number of shotgun reads examined (m)')
savefig('/tmp/fig2.pdf')
In [77]:
plot(soil_m2[:,0], soil_m2[:,3], label='16s/min=2')
plot(soil_m20[:,0], soil_m20[:,3], label='16s/min=20')
title('soil: C2 data set (real community)')
ylabel('fraction of 16s k-mers seen in data / reads kept')
xlabel('number of shotgun reads examined')
Out[77]:
In [135]:
plot(podar_v13[:,0] / 1e6, podar_v13[:,3], label='16s (v1-3, 454)', linewidth=2)
plot(m20[:,0] / 1e6, m20[:,3], label='16s (v3-5, 454)', linewidth=2)
plot(podar_v69[:,0] / 1e6, podar_v69[:,3], label='16s (v6-9, 454)', linewidth=2)
legend(loc='lower right')
axis(xmax=8)
#title('16s saturation curves for different regions')
ylabel('% 16s information seen')
xlabel('number of shotgun reads examined (m)')
savefig('/tmp/fig3.pdf')
Points:
In [138]:
figsize(4, 4)
contam = numpy.loadtxt('podar.16s.x.contam.k20.kmers.m20.out')
plot(m20[:,0] / 1e6, m20[:,3], 'b-', label='before addition', linewidth=2)
plot(contam[:,0] / 1e6, contam[:,3], 'g-', label='after addition', linewidth=2)
legend(loc='lower right')
#title('adding simulated phage')
ylabel('% 16s information seen')
xlabel('number of shotgun reads examined (m)')
savefig('/tmp/fig4.pdf')
In [127]:
figsize(7, 4)
In [ ]: