Goal

  • Get values ranges for microBetaDiv simulation run
    • values for MS

Setting paths


In [1]:
import os

# paths
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/'
buildDir = os.path.join(workDir, 'microBetaDiv')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde.pkl'
genome_index = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/genome_index.txt'

Init


In [2]:
import glob
import itertools
import nestly

In [3]:
%load_ext rpy2.ipython
%load_ext pushnote

In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘gridExtra’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:dplyr’:

    combine


  res = super(Function, self).__call__(*new_args, **new_kwargs)

Getting results


In [5]:
F = os.path.join(buildDir, '*-cMtx_byClass.txt')
files = glob.glob(F)
files


Out[5]:
['/home/nick/notebook/SIPSim/dev/bac_genome1147/microBetaDiv/DESeq2-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/microBetaDiv/DESeq2_l2fcCut0.5-multi-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/microBetaDiv/qSIP-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/microBetaDiv/heavy-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/microBetaDiv/DESeq2_multi-cMtx_byClass.txt']

In [6]:
%%R -i files

df_byClass = list()
for (f in files){
    ff = strsplit(f, '/') %>% unlist
    fff = ff[length(ff)]
    df_byClass[[fff]] = read.delim(f, sep='\t')
}

df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass = filter(df_byClass, perm_perc <= 20)

df_byClass %>% head(n=3)


  library      variables    values shared_perc perm_perc rep
1       2    Sensitivity 0.5940594          80        20   7
2       2    Specificity 0.9168421          80        20   7
3       2 Pos Pred Value 0.4316547          80        20   7
                     file method
1 DESeq2-cMtx_byClass.txt DESeq2
2 DESeq2-cMtx_byClass.txt DESeq2
3 DESeq2-cMtx_byClass.txt DESeq2

min/max


In [7]:
%%R
df_byClass %>%
    filter(variables == 'Specificity') %>%
    group_by(method, variables) %>%
    summarize(min_val = min(values, na.rm=TRUE),
              max_val = max(values, na.rm=TRUE))


Source: local data frame [5 x 4]
Groups: method [?]

             method   variables   min_val   max_val
              <chr>      <fctr>     <dbl>     <dbl>
1            DESeq2 Specificity 0.8761805 1.0000000
2 DESeq2_l2fcCut0.5 Specificity 0.8547190 1.0000000
3      DESeq2_multi Specificity 0.8380567 1.0000000
4             heavy Specificity 0.2867470 0.6331658
5              qSIP Specificity 0.6984318 0.9818731

In [8]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity') %>%
    group_by(method, variables) %>%
    summarize(min_val = min(values, na.rm=TRUE),
              max_val = max(values, na.rm=TRUE))


Source: local data frame [5 x 4]
Groups: method [?]

             method   variables   min_val   max_val
              <chr>      <fctr>     <dbl>     <dbl>
1            DESeq2 Sensitivity 0.4368932 0.9036145
2 DESeq2_l2fcCut0.5 Sensitivity 0.7169811 1.0000000
3      DESeq2_multi Sensitivity 0.7373737 1.0000000
4             heavy Sensitivity 0.7619048 1.0000000
5              qSIP Sensitivity 0.5377358 1.0000000

mean +/- s.d.


In [9]:
%%R
df_byClass %>%
    filter(variables == 'Specificity') %>%
    group_by(method, variables) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE))


Source: local data frame [5 x 4]
Groups: method [?]

             method   variables  mean_val     sd_val
              <chr>      <fctr>     <dbl>      <dbl>
1            DESeq2 Specificity 0.9436168 0.02409554
2 DESeq2_l2fcCut0.5 Specificity 0.9376568 0.03057763
3      DESeq2_multi Specificity 0.9283780 0.03370445
4             heavy Specificity 0.4829208 0.05901246
5              qSIP Specificity 0.9002039 0.05484518

In [10]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity') %>%
    group_by(method, variables) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE))


Source: local data frame [5 x 4]
Groups: method [?]

             method   variables  mean_val     sd_val
              <chr>      <fctr>     <dbl>      <dbl>
1            DESeq2 Sensitivity 0.6444863 0.08562299
2 DESeq2_l2fcCut0.5 Sensitivity 0.8623038 0.07178682
3      DESeq2_multi Sensitivity 0.8670507 0.06916366
4             heavy Sensitivity 0.8843186 0.06473706
5              qSIP Sensitivity 0.7374046 0.11817601

mean +/- s.d. (by params)


In [8]:
%%R
df_byClass %>%
    filter(variables == 'Specificity',
           method=='qSIP') %>%
    group_by(method, variables, shared_perc, perm_perc) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE)) %>%
    arrange(mean_val) %>%
    as.data.frame


   method   variables shared_perc perm_perc  mean_val     sd_val
1    qSIP Specificity         100        15 0.8578252 0.05452097
2    qSIP Specificity         100        10 0.8610767 0.07027121
3    qSIP Specificity         100        20 0.8696673 0.04016473
4    qSIP Specificity          95        10 0.8750393 0.05087529
5    qSIP Specificity          90        20 0.8864501 0.04312228
6    qSIP Specificity         100         5 0.8870155 0.07076713
7    qSIP Specificity          95        15 0.8899009 0.04000713
8    qSIP Specificity          85        10 0.8921552 0.05463595
9    qSIP Specificity          80        10 0.8933374 0.06100142
10   qSIP Specificity          90         5 0.8945461 0.05550165
11   qSIP Specificity          95         5 0.8952365 0.07230950
12   qSIP Specificity          85        20 0.8961290 0.05672697
13   qSIP Specificity          90        15 0.8980968 0.06084383
14   qSIP Specificity          80         5 0.9054185 0.04751081
15   qSIP Specificity          85        15 0.9072644 0.04392689
16   qSIP Specificity          95        20 0.9078017 0.06382407
17   qSIP Specificity          80        20 0.9119918 0.04781756
18   qSIP Specificity          95         0 0.9121775 0.03901323
19   qSIP Specificity         100         0 0.9152231 0.04975427
20   qSIP Specificity          90         0 0.9202989 0.04838376
21   qSIP Specificity          80        15 0.9217062 0.04433629
22   qSIP Specificity          90        10 0.9246495 0.04682681
23   qSIP Specificity          85         5 0.9251176 0.03179668
24   qSIP Specificity          80         0 0.9276176 0.03548019
25   qSIP Specificity          85         0 0.9293541 0.04049040

In [9]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity',
           method=='qSIP') %>%
    group_by(method, variables, shared_perc, perm_perc) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE)) %>%
    arrange(mean_val) %>%
    as.data.frame


   method   variables shared_perc perm_perc  mean_val     sd_val
1    qSIP Sensitivity          80        15 0.6179407 0.02374451
2    qSIP Sensitivity          80        20 0.6331728 0.04369966
3    qSIP Sensitivity          80        10 0.6348964 0.03625015
4    qSIP Sensitivity          85         0 0.6457721 0.03598500
5    qSIP Sensitivity          85        10 0.6459374 0.05489723
6    qSIP Sensitivity          80         5 0.6486920 0.04008792
7    qSIP Sensitivity          80         0 0.6503242 0.04281886
8    qSIP Sensitivity          85         5 0.6569599 0.04067362
9    qSIP Sensitivity          85        15 0.6651051 0.02761506
10   qSIP Sensitivity          85        20 0.6666168 0.04769413
11   qSIP Sensitivity          90         5 0.6947537 0.01960167
12   qSIP Sensitivity          90        15 0.7030872 0.02989937
13   qSIP Sensitivity          90        20 0.7080290 0.03330200
14   qSIP Sensitivity          90         0 0.7103585 0.03044823
15   qSIP Sensitivity          90        10 0.7140962 0.05075187
16   qSIP Sensitivity          95         5 0.7173819 0.03749605
17   qSIP Sensitivity          95        15 0.7238655 0.04690307
18   qSIP Sensitivity          95        20 0.7348206 0.02857758
19   qSIP Sensitivity          95         0 0.7556593 0.04552502
20   qSIP Sensitivity          95        10 0.7597457 0.03589931
21   qSIP Sensitivity         100         5 0.9406894 0.02736742
22   qSIP Sensitivity         100        15 0.9442823 0.03504697
23   qSIP Sensitivity         100        10 0.9474809 0.01864351
24   qSIP Sensitivity         100        20 0.9506753 0.02259105
25   qSIP Sensitivity         100         0 0.9647720 0.02194082

Difference between qSIP and MW-HR-SIP


In [12]:
%%R
df_byClass %>%
    dplyr::select(-file) %>%
    group_by(method, variables, shared_perc, perm_perc) %>%
    summarize(mean_val = mean(values, na.rm=TRUE)) %>%
    ungroup() %>%
    filter(variables %in% c('Specificity'),
           method %in% c('qSIP', 'DESeq2')) %>%
    spread(method, mean_val) %>% 
    mutate(diff = DESeq2 - qSIP) %>%
    arrange(diff) %>%
    summarize(mean_diff = mean(diff)) %>%
    as.data.frame


   mean_diff
1 0.04341295

In [ ]: