In [2]:
# paths
import os
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/'
buildDir = os.path.join(workDir, 'atomIncorp_taxaIncorp')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde.pkl'
genome_index = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/genome_index.txt'
In [3]:
import glob
import itertools
import nestly
In [4]:
%load_ext rpy2.ipython
%load_ext pushnote
In [5]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [6]:
F = os.path.join(buildDir, '*-cMtx_byClass.txt')
files = glob.glob(F)
files
Out[6]:
In [7]:
%%R -i files
df_byClass = list()
for (f in files){
ff = strsplit(f, '/') %>% unlist
fff = ff[length(ff)]
df_byClass[[fff]] = read.delim(f, sep='\t')
}
df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass %>% head(n=3)
In [12]:
%%R
df_byClass %>%
filter(variables == 'Specificity') %>%
group_by(method, variables) %>%
summarize(min_val = min(values, na.rm=TRUE),
max_val = max(values, na.rm=TRUE))
In [10]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity') %>%
group_by(method, variables) %>%
summarize(min_val = min(values, na.rm=TRUE),
max_val = max(values, na.rm=TRUE))
In [9]:
%%R
df_byClass %>%
filter(variables == 'Specificity') %>%
group_by(method, variables) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE))
In [10]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity') %>%
group_by(method, variables) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE))
In [15]:
%%R
df_byClass %>%
filter(variables == 'Specificity',
method=='qSIP') %>%
group_by(method, variables, percIncorp, percTaxa) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE)) %>%
arrange(mean_val) %>%
as.data.frame
In [16]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity',
method=='qSIP') %>%
group_by(method, variables, percIncorp, percTaxa) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE)) %>%
arrange(mean_val) %>%
as.data.frame
In [33]:
%%R
df_byClass %>%
dplyr::select(-file) %>%
group_by(method, variables, percIncorp, percTaxa) %>%
summarize(mean_val = mean(values, na.rm=TRUE)) %>%
ungroup() %>%
filter(variables %in% c('Specificity'),
method %in% c('qSIP', 'DESeq2')) %>%
spread(method, mean_val) %>%
mutate(diff = DESeq2 - qSIP) %>%
summarize(mean_diff = mean(diff)) %>%
as.data.frame
In [ ]: