Regional data
Need to generate:
In [2]:
%load_ext rpy2.ipython
%Rdevice svg
In [3]:
%%R
library(ape)
library(magrittr)
library(phangorn)
library(adephylo)
In [4]:
%%R
regionaldir <- "../rawdata/Regional"
stubs <- c("150129_PANGEAsim_Regional_FirstObj_scA_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scB_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scC_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scD_SIMULATED_SEQ")
numsc <- length(stubs)
In [5]:
%%R
genes <- c("gag","pol","env")
seqdata <- list()
for(i in 1:numsc){
for(j in 1:length(genes)){
if(j==1){
s <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ",genes[j],stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
snames <- row.names(s)
o <- order(snames)
snames <- snames[o]
s <- s[o,]
}else{
s2 <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ",genes[j],stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
s2names <- row.names(s2)
o <- order(s2names)
s2names <- s2names[o]
s <- cbind(s,s2[o,])
}
}
seqdata[[i]] <- s
}
In [6]:
%%R
seqnames.fn <- paste(stubs,".fas",sep="")
for(i in 1:numsc){
write.dna(seqdata[[i]],seqnames.fn[i],format="fasta",nbcol=-1,colsep="")
}
In [7]:
s="""DNA, gag = 1-1440
DNA, pol = 1441-4284
DNA, env = 4285-6807\n"""
f=open("regional_partition",'w')
f.write(s)
f.close()
Also copy over pol sequences
In [8]:
%%R
for(i in 1:numsc){
s <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ","pol",stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
snames <- row.names(s)
o <- order(snames)
snames <- snames[o]
write.dna(s,paste(gsub("SEQ","pol",stubs[i],fixed=TRUE),".fas",sep=""),format="fasta",nbcol=-1,colsep="")
}