In [1]:
# directory where you want the spacer blasting to be done
## CHANGE THIS!
workDir = "/home/nyoungb2/t/CLdb_Ecoli/spacers_shared/"
In [2]:
import os
from IPython.display import FileLinks
%load_ext rpy2.ipython
In [13]:
%%R
library(dplyr)
library(tidyr)
library(ggplot2)
In [3]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
In [4]:
# checking that CLdb is in $PATH & ~/.CLdb config file is set up
!CLdb --config-params
In [5]:
!cd $workDir; \
CLdb -- spacersShared -h
Lets try the default table
In [9]:
!cd $workDir; \
CLdb -- spacersShared | head
Let's show totals for each taxon
In [11]:
!cd $workDir; \
CLdb -- spacersShared -name | head
As you can see, not every genome contains all of the same unique spacer sequences
Let's plot this
In [12]:
!cd $workDir; \
CLdb -- spacersShared -name -long > shared_byTaxon.txt
In [16]:
%%R -i workDir
infile = file.path(workDir, 'shared_byTaxon.txt')
df = read.delim(infile, sep='\t')
df %>% head
In [21]:
%%R
# plotting
ggplot(df, aes(group_ID, Spacer_cluster, fill=count)) +
geom_tile() +
scale_x_discrete(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(y='unique spacer sequence') +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=60, hjust=1)
)
In [ ]: