Regional data

40000 individuals


In [1]:
%load_ext rpy2.ipython
%Rdevice svg

In [2]:
%%R
library(ape)
library(magrittr)
library(phangorn)
library(adephylo)


Loading required package: ade4

Attaching package: ‘adephylo’

The following object is masked from ‘package:ade4’:

    orthogram


In [3]:
%%R
regionaldir <- "../rawdata/november2014/HPTN071"
stubs <- c("281014_HPTN071_scA_rep1_SIMULATED","281014_HPTN071_scB_rep1_SIMULATED","281014_HPTN071_scC_rep1_SIMULATED")
numsc <- length(stubs)

In [4]:
%%R
genes <- c("gag","pol","env")
seqdata <- list()
for(i in 1:numsc){
    for(j in 1:length(genes)){
        if(j==1){
            s <- read.dna(paste(regionaldir,"/",stubs[i],"/",stubs[i],"_",genes[j],".fa",sep=""),format="fasta",as.matrix=TRUE)
            snames <- row.names(s)
            o <- order(snames)
            snames <- snames[o]
            s <- s[o,]
        }else{
            s2 <- read.dna(paste(regionaldir,"/",stubs[i],"/",stubs[i],"_",genes[j],".fa",sep=""),format="fasta",as.matrix=TRUE)
            s2names <- row.names(s2)
            o <- order(s2names)
            s2names <- s2names[o]
            s <- cbind(s,s2[o,])
        }
    }
    seqdata[[i]] <- s
}

In [5]:
%%R
seqnames.fn <- paste(stubs,".fas",sep="")
for(i in 1:numsc){
    write.dna(seqdata[[i]],seqnames.fn[i],format="fasta",nbcol=-1,colsep="")
}

In [49]:
s="""DNA, gag = 1-1440
DNA, pol = 1441-4284
DNA, env = 4285-6807\n"""
f=open("regional_partition",'w')
f.write(s)
f.close()