Regional data
40000 individuals
In [1]:
%load_ext rpy2.ipython
%Rdevice svg
In [2]:
%%R
library(ape)
library(magrittr)
library(phangorn)
library(adephylo)
In [10]:
%%R
regionaldirstub <- "../rawdata/november2014/HPTN071/replicates"
regionaldirs <- list.dirs(regionaldirstub)
regionaldirs <- regionaldirs[2:length(regionaldirs)]
stubs <- strsplit(regionaldirs,"/") %>% lapply(.,tail,1) %>% unlist
numsc <- length(stubs)
In [13]:
%%R
genes <- c("gag","pol","env")
seqdata <- list()
for(i in 1:numsc){
for(j in 1:length(genes)){
if(j==1){
s <- read.dna(paste(regionaldirs[i],"/",stubs[i],"_",genes[j],".fa",sep=""),format="fasta",as.matrix=TRUE)
snames <- row.names(s)
o <- order(snames)
snames <- snames[o]
s <- s[o,]
}else{
s2 <- read.dna(paste(regionaldirs[i],"/",stubs[i],"_",genes[j],".fa",sep=""),format="fasta",as.matrix=TRUE)
s2names <- row.names(s2)
o <- order(s2names)
s2names <- s2names[o]
s <- cbind(s,s2[o,])
}
}
seqdata[[i]] <- s
}
In [14]:
%%R
seqnames.fn <- paste(stubs,".fas",sep="")
for(i in 1:numsc){
write.dna(seqdata[[i]],seqnames.fn[i],format="fasta",nbcol=-1,colsep="")
}