In [130]:
%pylab inline
In [131]:
ls
In [132]:
p16sx16s_k20_reads = numpy.loadtxt('podar.16s.x.16s.k20.reads.out')
p16sx16s_k32_reads = numpy.loadtxt('podar.16s.x.16s.k32.reads.out')
p16sx16s_k20_kmers = numpy.loadtxt('podar.16s.x.16s.k20.kmers.out')
p16sx16s_k32_kmers = numpy.loadtxt('podar.16s.x.16s.k32.kmers.out')
p16sxwgs_k20_reads = numpy.loadtxt('podar.16s.x.wgs.k20.reads.out')
p16sxwgs_k32_reads = numpy.loadtxt('podar.16s.x.wgs.k32.reads.out')
p16sxwgs_k20_kmers = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.out')
p16sxwgs_k32_kmers = numpy.loadtxt('podar.16s.x.wgs.k32.kmers.out')
p16sxref_kmers = numpy.loadtxt('podar.16s.x.ref.kmers.out')
p16sxsim_kmers = numpy.loadtxt('podar.16s.x.sim.kmers.out')
In [133]:
#plot(p16sx16s_k20_reads[:,0], p16sx16s_k20_reads[:,3,], label='k20/reads')
#plot(p16sx16s_k32_reads[:,0], p16sx16s_k32_reads[:,3,], label='k32/reads')
plot(p16sx16s_k20_kmers[:,0], p16sx16s_k20_kmers[:,3,], label='k20/kmers')
plot(p16sx16s_k32_kmers[:,0], p16sx16s_k32_kmers[:,3,], label='k32/kmers')
title('podar 16s x 16s')
axis(ymin=0, xmin=0)
xlabel('reads')
ylabel('fraction covered')
legend(loc='upper left')
Out[133]:
In [134]:
plot(p16sxref_kmers[:,0], p16sxref_kmers[:,3,], label='k20/kmers')
title('podar 16s x ref')
axis(ymin=0, xmin=0)
xlabel('reads')
ylabel('fraction covered')
legend(loc='upper left')
Out[134]:
In [135]:
#plot(p16sxwgs_k20_reads[:,0], p16sxwgs_k20_reads[:,3,], label='k20/reads')
#plot(p16sxwgs_k32_reads[:,0], p16sxwgs_k32_reads[:,3,], label='k32/reads')
plot(p16sxwgs_k20_kmers[:,0], p16sxwgs_k20_kmers[:,3,], label='k20/kmers')
#plot(p16sxwgs_k32_kmers[:,0], p16sxwgs_k32_kmers[:,3,], label='k32/kmers')
title('podar 16s x wgs')
axis(ymin=0, xmin=0, ymax=12)
xlabel('reads')
ylabel('fraction covered (%)')
legend(loc='upper left')
Out[135]:
In [136]:
plot(p16sxsim_kmers[:,0], p16sxsim_kmers[:,3], label='sim reads')
title('podar 16s x wgs')
axis(ymin=0, xmin=0)
xlabel('reads')
ylabel('fraction covered (%)')
legend(loc='upper left')
Out[136]:
In [137]:
s16sx16s_k20_reads = numpy.loadtxt('soil.16s.x.16s.k20.reads.out')
s16sx16s_k32_reads = numpy.loadtxt('soil.16s.x.16s.k32.reads.out')
s16sx16s_k20_kmers = numpy.loadtxt('soil.16s.x.16s.k20.kmers.out')
s16sx16s_k32_kmers = numpy.loadtxt('soil.16s.x.16s.k32.kmers.out')
s16sxwgs_k20_reads = numpy.loadtxt('soil.16s.x.wgs.k20.reads.out')
s16sxwgs_k32_reads = numpy.loadtxt('soil.16s.x.wgs.k32.reads.out')
s16sxwgs_k20_kmers = numpy.loadtxt('soil.16s.x.wgs.k20.kmers.out')
s16sxwgs_k32_kmers = numpy.loadtxt('soil.16s.x.wgs.k32.kmers.out')
In [138]:
#plot(s16sx16s_k20_reads[:,0], s16sx16s_k20_reads[:,3,], label='k20/reads')
#plot(s16sx16s_k32_reads[:,0], s16sx16s_k32_reads[:,3,], label='k32/reads')
plot(s16sx16s_k20_kmers[:,0], s16sx16s_k20_kmers[:,3,], label='k20/kmers')
plot(s16sx16s_k32_kmers[:,0], s16sx16s_k32_kmers[:,3,], label='k32/kmers')
title('soil 16s x 16s, reads')
axis(ymin=0, ymax=100, xmin=0)
xlabel('reads')
ylabel('fraction covered')
legend(loc='upper left')
Out[138]:
In [139]:
#plot(s16sxwgs_k20_reads[:,0], s16sxwgs_k20_reads[:,3,], label='k20/reads')
#plot(s16sxwgs_k32_reads[:,0], s16sxwgs_k32_reads[:,3,], label='k32/reads')
plot(s16sxwgs_k20_kmers[:,0], s16sxwgs_k20_kmers[:,3,], label='k20/kmers')
#plot(s16sxwgs_k32_kmers[:,0], s16sxwgs_k32_kmers[:,3,], label='k32/kmers')
title('soil 16s x wgs')
axis(ymin=0, xmin=0)
xlabel('reads')
ylabel('fraction covered')
legend(loc='upper left')
Out[139]:
Conclusions/questions --
In [171]:
lines = []
last_total = -1
base_total = 0
base_kept = 0
data = []
for line in open('podar.wgs.report'):
total, kept, f = line.split()
total, kept = int(total), int(kept)
if total < last_total:
base_total = last_total
base_kept = last_kept
data.append((total + base_total, kept + base_kept))
last_kept = kept
last_total = total
diginorm = numpy.array(data)
diginorm_C1 = numpy.loadtxt('podar.wgs.C1.report')
In [172]:
plot(diginorm[:,0], diginorm[:,1])
Out[172]:
In [173]:
plot(diginorm_C1[:,0], diginorm_C1[:,1], label='C1')
plot(diginorm[:,0] / 10, diginorm[:,1] / 13., label='C10')
axis(xmax=1e7)
legend(loc='lower right')
Out[173]:
In [142]:
podar_16s_kmer = numpy.loadtxt('podar_16s.hist')
In [143]:
plot(podar_16s_kmer[:,0], podar_16s_kmer[:,1])
axis(xmax=20, ymax=1000)
Out[143]:
In [144]:
m10 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m10.out')
m2 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m2.out')
m20 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m20.out')
m40 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m40.out')
m5 = numpy.loadtxt('podar.16s.x.wgs.k20.kmers.m5.out')
m2_k32 = numpy.loadtxt('podar.16s.x.wgs.k32.kmers.out')
In [145]:
plot(m40[:,0], m40[:,3], label='m40')
plot(m20[:,0], m20[:,3], label='m20')
plot(m2_k32[:,0], m2_k32[:,3], label='m20/k=32')
plot(m10[:,0], m10[:,3], label='m10')
plot(m5[:,0], m5[:,3], label='m5')
plot(m2[:,0], m2[:,3], label='m2')
legend()
Out[145]:
In [273]:
plot(m20[:,0] / 1e6, m20[:,3], 'r-', label='16s (v1-3)', linewidth=2)
ylabel('% information in 16s')
xlabel('number of shotgun reads examined (m)')
axis(xmax=5)
savefig('/tmp/fig8.pdf')
In [146]:
plot(m20[:,0], m20[:,3], label='m20')
plot(m2_k32[:,0], m2_k32[:,3], label='m20/k=32')
legend(loc='lower right')
title('choice of k doesn\'t matter')
Out[146]:
In [147]:
plot(diginorm[:,0], diginorm[:,1])
axis(xmax=2e7)
Out[147]:
In [148]:
plot(m2[:,0], m2[:,3], label='m2')
plot(m5[:,0], m5[:,3], label='m5')
plot(m10[:,0], m10[:,3], label='m10')
plot(m20[:,0], m20[:,3], label='m20')
plot(m40[:,0], m40[:,3], label='m40')
axis(xmax=0.5e7)
legend()
Out[148]:
In [149]:
plot(diginorm[:,0], diginorm[:,1])
axis(xmax=0.5e7)
Out[149]:
In [150]:
sim_m10 = numpy.loadtxt('podar.16s.x.sim.k20.kmers.m10.out')
sim_m2 = numpy.loadtxt('podar.16s.x.sim.k20.kmers.m2.out')
sim_m20 = numpy.loadtxt('podar.16s.x.sim.k20.kmers.m20.out')
sim_m40 = numpy.loadtxt('podar.16s.x.sim.k20.kmers.m40.out')
sim_m5 = numpy.loadtxt('podar.16s.x.sim.k20.kmers.m5.out')
In [151]:
plot(sim_m2[:,0], sim_m2[:,3], label='m2')
plot(sim_m5[:,0], sim_m5[:,3], label='m5')
plot(sim_m10[:,0], sim_m10[:,3], label='m10')
plot(sim_m20[:,0], sim_m20[:,3], label='m20')
plot(sim_m40[:,0], sim_m40[:,3], label='m40')
legend()
Out[151]:
In [152]:
#xref
#xsim
#on soil
In [174]:
soil_m2 = numpy.loadtxt('soil.16s.x.wgs.k20.kmers.m2.out')
soil_m20 = numpy.loadtxt('soil.16s.x.wgs.k20.kmers.m20.out')
plot(soil_m2[:,0], soil_m2[:,3])
plot(soil_m20[:,0], soil_m20[:,3])
Out[174]:
In [175]:
podar_v13 = numpy.loadtxt('podar.v13.x.wgs.k20.kmers.m20.out')
podar_v4arch = numpy.loadtxt('podar.v4arch.x.wgs.k20.kmers.m20.out')
podar_v69 = numpy.loadtxt('podar.v69.x.wgs.k20.kmers.m20.out')
In [182]:
plot(podar_v13[:,0], podar_v13[:,3], label='v13')
plot(m20[:,0], m20[:,3], label='v35')
#plot(podar_v4arch[:,0], podar_v4arch[:,3], label='v4 (arch)')
plot(podar_v69[:,0], podar_v69[:,3], label='v69')
legend(loc='lower right')
Out[182]:
In [186]:
beetle_m20 = numpy.loadtxt('beetle.kmers.m20.out')
plot(beetle_m20[:,0], beetle_m20[:,3])
title('beetle gut: 16s saturation curve')
ylabel('fraction of informative 16s k-mers seen in shotgun data')
xlabel('number of shotgun reads examined')
Out[186]:
In [206]:
lanier_16s_x_wgs = numpy.loadtxt('lanier.16s1.x.wgs.kmers.out')
plot(lanier_16s_x_wgs[:,0], lanier_16s_x_wgs[:,3], label='Lake Lanier')
plot(beetle_m20[:,0] * 4, beetle_m20[:,3], label='Beetle')
plot(soil_m20[:,0], soil_m20[:,3], label='soil')
legend(loc='lower right')
Out[206]:
In [192]:
lines = []
last_total = -1
base_total = 0
base_kept = 0
data = []
for line in open('lanier.wgs.C1.report'):
total, kept, f = line.split()
total, kept = int(total), int(kept)
if total < last_total:
print total
base_total = last_total + base_total
base_kept = last_kept + base_kept
data.append((total + base_total, kept + base_kept))
last_kept = kept
last_total = total
lanier_C1 = numpy.array(data)
In [194]:
plot(lanier_C1[:,0], lanier_C1[:,1])
Out[194]:
In [244]:
podar_c0 = numpy.loadtxt('podar.c0.x.wgs.k20.kmers.m20.out')
podar_c01 = numpy.loadtxt('podar.c01.x.wgs.k20.kmers.m20.out')
podar_c02 = numpy.loadtxt('podar.c02.x.wgs.k20.kmers.m20.out')
podar_c03 = numpy.loadtxt('podar.c03.x.wgs.k20.kmers.m20.out')
podar_c04 = numpy.loadtxt('podar.c04.x.wgs.k20.kmers.m20.out')
podar_c05 = numpy.loadtxt('podar.c05.x.wgs.k20.kmers.m20.out')
podar_c20 = numpy.loadtxt('podar.c20.x.wgs.k20.kmers.m20.out')
podar_c40 = numpy.loadtxt('podar.c40.x.wgs.k20.kmers.m20.out')
#plot(podar_c0[:,0], podar_c0[:,3], label='otu 0')
#plot(podar_c01[:,0] / 1e6, podar_c01[:,3], 'g-', label='otu 1', linewidth=2)
plot(podar_c02[:,0] / 1e6, podar_c02[:,3], 'b-', label='otu 2', linewidth=2)
plot(podar_c03[:,0] / 1e6, podar_c03[:,3], 'c-', label='otu 3', linewidth=2)
plot(podar_c04[:,0] / 1e6, podar_c04[:,3], 'y-', label='otu 4', linewidth=2)
plot(podar_c05[:,0] / 1e6, podar_c05[:,3], 'r-', label='otu 5', linewidth=2)
#plot(podar_c20[:,0], podar_c20[:,3], label='otu 20')
#plot(podar_c40[:,0], podar_c40[:,3], label='otu 40')
legend(loc='lower right')
axis(xmax=2)
ylabel('% 16s information seen')
xlabel('number of shotgun reads examined (m)')
savefig('/tmp/fig6.pdf')
In [235]:
# otu5 is acidobacterium; one species, Acidobacterium capsulatum, with one rRNA; 4.6% of BA community, 4.7% of Illumina reads;
# otu2 is chlorobium; five species, total of 10 rRNA; 9.1% of Illumina.
corr_factor = 5
plot(podar_c02[:,0] / 1e6, podar_c02[:,3], 'b-', label='Chlorobium (uncorrected)', linewidth=2)
plot(podar_c02[:,0] * 5 / 1e6, podar_c02[:,3], 'b--', label='Chlorobium (abund adjusted)', linewidth=2)
plot(podar_c05[:,0] / 1e6, podar_c05[:,3], 'r-', label='Acidobacterium', linewidth=2)
legend(loc='lower right')
axis(xmax=2)
ylabel('% of 16s information')
xlabel('number of shotgun reads examined (m)')
savefig('/tmp/fig5.pdf')
In [224]:
wa_16s = numpy.loadtxt('wrighton.16s.x.wgs-a.kmers.out')
wb_16s = numpy.loadtxt('wrighton.16s.x.wgs-b.kmers.out')
wc_16s = numpy.loadtxt('wrighton.16s.x.wgs-c.kmers.out')
In [229]:
plot(wa_16s[:,0], wa_16s[:,3], label='A')
plot(wb_16s[:,0], wa_16s[:,3], label='B')
plot(wc_16s[:,0], wa_16s[:,3], label='C')
axis(xmax=1200000)
Out[229]:
In [267]:
plot(wa_16s[:,1] / 1e6, wa_16s[:,3], label='groundwater (Wrighton et al.)', linewidth=2)
plot(beetle_m20[:,1] / 1e6, beetle_m20[:,3], label='beetle gut (Scully et al.)', linewidth=2)
plot(lanier_16s_x_wgs[:,1] / 1e6, lanier_16s_x_wgs[:,3], label='Lake Lanier (Poretsky et al.)', linewidth=2)
plot(soil_m20[:,1] / 1e6, soil_m20[:,3], label='corn rhizosphere (unpub)', linewidth=2)
legend(loc='upper right')
xlabel('Total bp seen (Mbp)')
ylabel('% 16s information seen')
axis(xmax=2000)
savefig('/tmp/fig7.pdf')
In [ ]: