In [1]:
supInfoFile = '/home/nick/notebook/SIPSim/dev/qSIP/PeerJ_qSIP_preprint/PeerJ_Supplemental_Information.pdf'
In [79]:
import os,sys
%load_ext rpy2.ipython
In [81]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [57]:
pdfTextFile= os.path.splitext(supInfoFile)[0] + '.txt'
!pdf2txt.py $supInfoFile | perl -pe 's/\r+/ /g; s/[ \t ]+/\t/g' > $pdfTextFile
!head -n 4 $pdfTextFile
!printf '===============\n'
!tail -n 4 $pdfTextFile
In [48]:
!egrep "Table.+S2" $pdfTextFile
In [77]:
tableS2File = pdfTextFile= os.path.splitext(supInfoFile)[0] + '_e.txt'
!head -n4 $tableS2File
In [88]:
%%R -i tableS2File -w 900
tbl.s2 = read.delim(tableS2File, sep='\t') %>%
mutate(Tube = as.character(Tube))
ggplot(tbl.s2, aes(density.g.p.mL, X16S.qPCR.copynum.p.ul, color=Tube)) +
geom_point() +
geom_line() +
theme_bw() +
facet_grid(Glucose ~ Water) +
theme(
text = element_text(size=16)
)
In [91]:
%%R -w 600 -h 300
ggplot(tbl.s2, aes(density.g.p.mL, X16S.qPCR.copynum.p.ul)) +
geom_smooth() +
theme_bw() +
theme(
text = element_text(size=16)
)
In [127]:
%%R -w 600 -h 300
tbl.s2.s = tbl.s2 %>%
group_by(ntile(density.g.p.mL, 20)) %>%
summarize(min_density = min(density.g.p.mL),
mean_density = mean(density.g.p.mL),
max_density = max(density.g.p.mL),
density_width = max_density - min_density,
var_copy = var(X16S.qPCR.copynum.p.ul),
sd_copy = sd(X16S.qPCR.copynum.p.ul),
mean_copy = mean(X16S.qPCR.copynum.p.ul))
ggplot(tbl.s2.s, aes(mean_density, sd_copy, color='red')) +
geom_bar(stat='identity', aes(width=density_width)) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [130]:
%%R -w 600 -h 500
# how does variance relate to mean copy number
p1 = ggplot(tbl.s2.s, aes(mean_copy, var_copy)) +
geom_point() +
theme_bw() +
theme(
text = element_text(size=16)
)
p2 = ggplot(tbl.s2.s, aes(mean_copy, sd_copy)) +
geom_point() +
geom_smooth(method='lm') +
theme_bw() +
theme(
text = element_text(size=16)
)
grid.arrange(p1, p2, ncol=1)
In [129]:
%%R
res = lm(sd_copy ~ mean_copy, tbl.s2.s)
summary(res)
In [74]:
def subInfo_parser(iFH):
# parsing tableS2 from qSIP paper
for line in iFH:
if re.match('.+Table.+S2', line):
tableS2_parser(iFH)
def tableS2_parser(iFH):
# header = ('#SampleID', 'Tube', 'glucose', 'p-', '16S-')
# is_header = lambda x: [x.startswith(y) for y in header]
tableS2 = {}
for line in iFH:
if any(is_header(line)):
parse
def parse_column(iFH):
with open(pdfTextFile, 'rb') as iFH:
tableS2_parser(iFH)
In [ ]: