Goal

  • Get values ranges for atomIncorp_taxaIncorp simulation run
    • values for MS

Setting paths


In [2]:
# paths
import os

workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/'
buildDir = os.path.join(workDir, 'atomIncorp_taxaIncorp')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde.pkl'
genome_index = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/genome_index.txt'

Init


In [3]:
import glob
import itertools
import nestly

In [4]:
%load_ext rpy2.ipython
%load_ext pushnote

In [5]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘gridExtra’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:dplyr’:

    combine


  res = super(Function, self).__call__(*new_args, **new_kwargs)

Getting results


In [6]:
F = os.path.join(buildDir, '*-cMtx_byClass.txt')
files = glob.glob(F)
files


Out[6]:
['/home/nick/notebook/SIPSim/dev/bac_genome1147/atomIncorp_taxaIncorp/DESeq2-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/atomIncorp_taxaIncorp/qSIP-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/atomIncorp_taxaIncorp/heavy-cMtx_byClass.txt',
 '/home/nick/notebook/SIPSim/dev/bac_genome1147/atomIncorp_taxaIncorp/DESeq2_multi-cMtx_byClass.txt']

In [7]:
%%R -i files

df_byClass = list()
for (f in files){
    ff = strsplit(f, '/') %>% unlist
    fff = ff[length(ff)]
    df_byClass[[fff]] = read.delim(f, sep='\t')
}

df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)

df_byClass %>% head(n=3)


  library      variables    values percIncorp percTaxa rep
1       2    Sensitivity 0.2805755         50       50   7
2       2    Specificity 1.0000000         50       50   7
3       2 Pos Pred Value 1.0000000         50       50   7
                     file method
1 DESeq2-cMtx_byClass.txt DESeq2
2 DESeq2-cMtx_byClass.txt DESeq2
3 DESeq2-cMtx_byClass.txt DESeq2

min/max


In [12]:
%%R
df_byClass %>%
    filter(variables == 'Specificity') %>%
    group_by(method, variables) %>%
    summarize(min_val = min(values, na.rm=TRUE),
              max_val = max(values, na.rm=TRUE))


Source: local data frame [4 x 4]
Groups: method [?]

        method   variables   min_val   max_val
         <chr>      <fctr>     <dbl>     <dbl>
1       DESeq2 Specificity 0.9984301 1.0000000
2 DESeq2_multi Specificity 0.9870490 1.0000000
3        heavy Specificity 0.1098004 0.6853767
4         qSIP Specificity 0.6666667 0.9813737

In [10]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity') %>%
    group_by(method, variables) %>%
    summarize(min_val = min(values, na.rm=TRUE),
              max_val = max(values, na.rm=TRUE))


Source: local data frame [4 x 4]
Groups: method [?]

        method   variables    min_val max_val
         <chr>      <fctr>      <dbl>   <dbl>
1       DESeq2 Sensitivity 0.00000000       1
2 DESeq2_multi Sensitivity 0.01860465       1
3        heavy Sensitivity 0.72727273       1
4         qSIP Sensitivity 0.54545455       1

mean +/- s.d.


In [9]:
%%R
df_byClass %>%
    filter(variables == 'Specificity') %>%
    group_by(method, variables) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE))


Source: local data frame [4 x 4]
Groups: method [?]

        method   variables  mean_val       sd_val
         <chr>      <fctr>     <dbl>        <dbl>
1       DESeq2 Specificity 0.9999898 0.0001166902
2 DESeq2_multi Specificity 0.9993969 0.0016998115
3        heavy Specificity 0.2786951 0.1643863872
4         qSIP Specificity 0.8838634 0.0586022230

In [10]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity') %>%
    group_by(method, variables) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE))


Source: local data frame [4 x 4]
Groups: method [?]

        method   variables  mean_val     sd_val
         <chr>      <fctr>     <dbl>      <dbl>
1       DESeq2 Sensitivity 0.4724049 0.31490068
2 DESeq2_multi Sensitivity 0.6629941 0.29586940
3        heavy Sensitivity 0.9245333 0.06439925
4         qSIP Sensitivity 0.9095945 0.05959184

mean +/- s.d. (by params)


In [15]:
%%R
df_byClass %>%
    filter(variables == 'Specificity',
           method=='qSIP') %>%
    group_by(method, variables, percIncorp, percTaxa) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE)) %>%
    arrange(mean_val) %>%
    as.data.frame


   method   variables percIncorp percTaxa  mean_val     sd_val
1    qSIP Specificity        100        1 0.8582665 0.06679054
2    qSIP Specificity         25       10 0.8606177 0.06864633
3    qSIP Specificity         25       25 0.8656590 0.05045378
4    qSIP Specificity          0       50 0.8698730 0.07816412
5    qSIP Specificity         15       50 0.8703427 0.04429995
6    qSIP Specificity          0        5 0.8721416 0.05364967
7    qSIP Specificity        100       50 0.8754534 0.07077869
8    qSIP Specificity        100        5 0.8756387 0.09349809
9    qSIP Specificity        100       25 0.8767710 0.05752558
10   qSIP Specificity         25       50 0.8795854 0.06877459
11   qSIP Specificity         50        5 0.8796246 0.06520878
12   qSIP Specificity          0       10 0.8818512 0.05212165
13   qSIP Specificity         15        5 0.8833004 0.04326785
14   qSIP Specificity         25        5 0.8836289 0.06543908
15   qSIP Specificity         25        1 0.8844525 0.06073938
16   qSIP Specificity         50       10 0.8846595 0.03504214
17   qSIP Specificity          0       25 0.8865699 0.03826421
18   qSIP Specificity         15        1 0.8881183 0.06476368
19   qSIP Specificity         50       25 0.8954663 0.05564128
20   qSIP Specificity          0        1 0.8970962 0.03542797
21   qSIP Specificity         15       10 0.8984085 0.03882326
22   qSIP Specificity         50       50 0.9004672 0.06018125
23   qSIP Specificity         15       25 0.9026506 0.05825303
24   qSIP Specificity        100       10 0.9117078 0.03659808
25   qSIP Specificity         50        1 0.9142342 0.03165473

In [16]:
%%R
df_byClass %>%
    filter(variables == 'Sensitivity',
           method=='qSIP') %>%
    group_by(method, variables, percIncorp, percTaxa) %>%
    summarize(mean_val = mean(values, na.rm=TRUE),
              sd_val = sd(values, na.rm=TRUE)) %>%
    arrange(mean_val) %>%
    as.data.frame


   method   variables percIncorp percTaxa  mean_val      sd_val
1    qSIP Sensitivity         15       25 0.8561337 0.028905507
2    qSIP Sensitivity         15        5 0.8622943 0.030733507
3    qSIP Sensitivity         15       50 0.8653961 0.015116509
4    qSIP Sensitivity         15       10 0.8665195 0.034107758
5    qSIP Sensitivity         15        1 0.8854545 0.116606330
6    qSIP Sensitivity         50        1 0.8945455 0.142177024
7    qSIP Sensitivity         25       10 0.8958477 0.031271014
8    qSIP Sensitivity         25       50 0.9050348 0.016293899
9    qSIP Sensitivity         25        5 0.9050976 0.041106911
10   qSIP Sensitivity         25       25 0.9075375 0.013894165
11   qSIP Sensitivity         50       50 0.9170469 0.018121752
12   qSIP Sensitivity         50        5 0.9170827 0.032738165
13   qSIP Sensitivity         50       25 0.9188085 0.010369028
14   qSIP Sensitivity         25        1 0.9345455 0.083912587
15   qSIP Sensitivity        100       50 0.9345952 0.010085473
16   qSIP Sensitivity        100       10 0.9390744 0.006752672
17   qSIP Sensitivity         50       10 0.9403575 0.019146335
18   qSIP Sensitivity        100       25 0.9437450 0.013289597
19   qSIP Sensitivity        100        5 0.9438842 0.026476977
20   qSIP Sensitivity        100        1 0.9588889 0.068564929
21   qSIP Sensitivity          0        1       NaN         NaN
22   qSIP Sensitivity          0        5       NaN         NaN
23   qSIP Sensitivity          0       10       NaN         NaN
24   qSIP Sensitivity          0       25       NaN         NaN
25   qSIP Sensitivity          0       50       NaN         NaN

Difference between qSIP and MW-HR-SIP


In [33]:
%%R
df_byClass %>%
    dplyr::select(-file) %>%
    group_by(method, variables, percIncorp, percTaxa) %>%
    summarize(mean_val = mean(values, na.rm=TRUE)) %>%
    ungroup() %>%
    filter(variables %in% c('Specificity'),
           method %in% c('qSIP', 'DESeq2')) %>%
    spread(method, mean_val) %>% 
    mutate(diff = DESeq2 - qSIP) %>%
    summarize(mean_diff = mean(diff)) %>%
    as.data.frame


  mean_diff
1 0.1161264

In [ ]: