In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/multiWindow_HRSIP/'
genomeDir = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/'
figureDir = '/home/nick/notebook/SIPSim/figures/bac_genome_n1147/'
In [2]:
import glob
import nestly
import os
%load_ext rpy2.ipython
%load_ext pushnote
In [3]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [4]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
In [5]:
def HRSIP_multi_window(physeq, BD_shift, BDs, outname, padj=0.1, log2=0.25):
# HR-SIP for each window
occurs = ','.join([str(x/100.0) for x in range(0,55,5)])
outname2 = outname + '_DS2.txt'
!SIPSimR phyloseq_DESeq2 --occur_all $occurs -w $BDs $physeq > $outname2
# making confusion matrix
!SIPSimR DESeq2_confuseMtx --padj $padj --log2 $log2 $BD_shift -o $outname $outname2
In [ ]:
BDs = '1.70-1.74,1.72-1.76'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW1')
In [ ]:
BDs = '1.70-1.75,1.72-1.77'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW2')
In [ ]:
BDs = '1.70-1.73,1.72-1.75,1.74-1.77'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW3')
In [ ]:
BDs = '1.70-1.72,1.71-1.73,1.72-1.74,1.73-1.75'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW4')
In [ ]:
BDs = '1.71-1.75'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW5')
In [ ]:
BDs = '1.71-1.8'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW6')
In [ ]:
files = ['OTU_n2_abs1e9_PCR_subNorm_MW{}_byClass.txt'.format(i) for i in xrange(1,7)]
files
In [ ]:
%%R -i files
renames = data.frame(file = c('MW1', 'MW2', 'MW3', 'MW4', 'MW5', 'MW6'),
BD_window = c('1.70-1.74,\n1.72-1.76',
'1.70-1.75,\n1.72-1.77',
'1.70-1.73,\n1.72-1.75,\n1.74-1.77',
'1.70-1.72,\n1.71-1.73,\n1.72-1.74,\n1.73-1.75',
'1.71-1.75', '1.71-1.8'))
df = list()
for(F in files){
tmp = read.delim(F, sep='\t')
df[[F]] = tmp
}
df = do.call(rbind, df)
df$file = gsub('.+(MW[0-9]+).+', '\\1', rownames(df))
df = inner_join(df, renames, c('file' = 'file'))
df %>% head(n=3)
In [ ]:
%%R -h 550
ggplot(df, aes(variables, values)) +
geom_bar(stat='identity') +
labs(y='Value') +
facet_grid(BD_window ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=45, hjust=1)
)
In [ ]:
%%R -h 250 -w 550
df.f = df %>%
filter(variables %in% c('Sensitivity', 'Specificity', 'Balanced Accuracy')) %>%
mutate(variables = gsub(' ', '\n', variables))
ggplot(df.f, aes(BD_window, values, color=variables)) +
geom_point(size=3, alpha=0.7) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=60, hjust=1)
)
In [ ]:
%pushnote validation MW-HR-SIP complete
In [ ]:
def HRSIP_multi_window_post(physeq, BD_shift, BDs, outname, padj=0.1, log2=0.25):
# HR-SIP for each window
occurs = ','.join([str(x/100.0) for x in range(0,55,5)])
outname2 = outname + '_DS2.txt'
!SIPSimR phyloseq_DESeq2 --occur_heavy $occurs -w $BDs $physeq > $outname2
# making confusion matrix
!SIPSimR DESeq2_confuseMtx --padj $padj --log2 $log2 $BD_shift -o $outname $outname2
In [ ]:
BDs = '1.70-1.74,1.72-1.76'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW1p')
In [ ]:
BDs = '1.70-1.75,1.72-1.77'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW2p')
In [ ]:
BDs = '1.70-1.73,1.72-1.75,1.74-1.77'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW3p')
In [ ]:
BDs = '1.70-1.72,1.71-1.73,1.72-1.74,1.73-1.75'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW4p')
In [ ]:
BDs = '1.71-1.75'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW5p')
In [ ]:
BDs = '1.71-1.8'
physeq = '../OTU_n2_abs1e9_PCR_subNorm.physeq'
BD_shift = '../ampFrags_BD-shift.txt'
HRSIP_multi_window_post(physeq, BD_shift, BDs, 'OTU_n2_abs1e9_PCR_subNorm_MW6p')
In [ ]:
files = ['OTU_n2_abs1e9_PCR_subNorm_MW{}p_byClass.txt'.format(i) for i in xrange(1,7)]
files
In [ ]:
%%R -i files
renames = data.frame(file = c('MW1', 'MW2', 'MW3', 'MW4', 'MW5', 'MW6'),
BD_window = c('1.70-1.74,\n1.72-1.76',
'1.70-1.75,\n1.72-1.77',
'1.70-1.73,\n1.72-1.75,\n1.74-1.77',
'1.70-1.72,\n1.71-1.73,\n1.72-1.74,\n1.73-1.75',
'1.71-1.75', '1.71-1.8'))
df = list()
for(F in files){
tmp = read.delim(F, sep='\t')
df[[F]] = tmp
}
df = do.call(rbind, df)
df$file = gsub('.+(MW[0-9]+).+', '\\1', rownames(df))
df = inner_join(df, renames, c('file' = 'file'))
df %>% head(n=3)
In [ ]:
%%R -h 550
ggplot(df, aes(variables, values)) +
geom_bar(stat='identity') +
labs(y='Value') +
facet_grid(BD_window ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=45, hjust=1)
)
In [ ]:
%%R -h 250 -w 550
df.f = df %>%
filter(variables %in% c('Sensitivity', 'Specificity', 'Balanced Accuracy')) %>%
mutate(variables = gsub(' ', '\n', variables))
ggplot(df.f, aes(BD_window, values, color=variables)) +
geom_point(size=3, alpha=0.7) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=60, hjust=1)
)
In [ ]:
%pushnote validation MW-HR-SIP complete
In [ ]: