In [1]:
import os
# paths
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/'
buildDir = os.path.join(workDir, 'microBetaDiv')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde.pkl'
genome_index = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/genome_index.txt'
In [2]:
import glob
import itertools
import nestly
In [3]:
%load_ext rpy2.ipython
%load_ext pushnote
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [5]:
F = os.path.join(buildDir, '*-cMtx_byClass.txt')
files = glob.glob(F)
files
Out[5]:
In [6]:
%%R -i files
df_byClass = list()
for (f in files){
ff = strsplit(f, '/') %>% unlist
fff = ff[length(ff)]
df_byClass[[fff]] = read.delim(f, sep='\t')
}
df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass = filter(df_byClass, perm_perc <= 20)
df_byClass %>% head(n=3)
In [7]:
%%R
df_byClass %>%
filter(variables == 'Specificity') %>%
group_by(method, variables) %>%
summarize(min_val = min(values, na.rm=TRUE),
max_val = max(values, na.rm=TRUE))
In [8]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity') %>%
group_by(method, variables) %>%
summarize(min_val = min(values, na.rm=TRUE),
max_val = max(values, na.rm=TRUE))
In [9]:
%%R
df_byClass %>%
filter(variables == 'Specificity') %>%
group_by(method, variables) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE))
In [10]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity') %>%
group_by(method, variables) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE))
In [8]:
%%R
df_byClass %>%
filter(variables == 'Specificity',
method=='qSIP') %>%
group_by(method, variables, shared_perc, perm_perc) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE)) %>%
arrange(mean_val) %>%
as.data.frame
In [9]:
%%R
df_byClass %>%
filter(variables == 'Sensitivity',
method=='qSIP') %>%
group_by(method, variables, shared_perc, perm_perc) %>%
summarize(mean_val = mean(values, na.rm=TRUE),
sd_val = sd(values, na.rm=TRUE)) %>%
arrange(mean_val) %>%
as.data.frame
In [12]:
%%R
df_byClass %>%
dplyr::select(-file) %>%
group_by(method, variables, shared_perc, perm_perc) %>%
summarize(mean_val = mean(values, na.rm=TRUE)) %>%
ungroup() %>%
filter(variables %in% c('Specificity'),
method %in% c('qSIP', 'DESeq2')) %>%
spread(method, mean_val) %>%
mutate(diff = DESeq2 - qSIP) %>%
arrange(diff) %>%
summarize(mean_diff = mean(diff)) %>%
as.data.frame
In [ ]: