Regional data

Need to generate:

  • Pol only
  • Gag/pol/env

In [2]:
%load_ext rpy2.ipython
%Rdevice svg

In [3]:
%%R
library(ape)
library(magrittr)
library(phangorn)
library(adephylo)


Loading required package: ade4

Attaching package: ‘adephylo’

The following object is masked from ‘package:ade4’:

    orthogram


In [4]:
%%R
regionaldir <- "../rawdata/Regional"
stubs <- c("150129_PANGEAsim_Regional_FirstObj_scA_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scB_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scC_SIMULATED_SEQ","150129_PANGEAsim_Regional_FirstObj_scD_SIMULATED_SEQ")
numsc <- length(stubs)

In [5]:
%%R
genes <- c("gag","pol","env")
seqdata <- list()
for(i in 1:numsc){
    for(j in 1:length(genes)){
        if(j==1){
            s <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ",genes[j],stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
            snames <- row.names(s)
            o <- order(snames)
            snames <- snames[o]
            s <- s[o,]
        }else{
            s2 <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ",genes[j],stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
            s2names <- row.names(s2)
            o <- order(s2names)
            s2names <- s2names[o]
            s <- cbind(s,s2[o,])
        }
    }
    seqdata[[i]] <- s
}

In [6]:
%%R
seqnames.fn <- paste(stubs,".fas",sep="")
for(i in 1:numsc){
    write.dna(seqdata[[i]],seqnames.fn[i],format="fasta",nbcol=-1,colsep="")
}

In [7]:
s="""DNA, gag = 1-1440
DNA, pol = 1441-4284
DNA, env = 4285-6807\n"""
f=open("regional_partition",'w')
f.write(s)
f.close()

Also copy over pol sequences


In [8]:
%%R
for(i in 1:numsc){
    s <- read.dna(paste(regionaldir,"/",stubs[i],"/",gsub("SEQ","pol",stubs[i],fixed=TRUE),".fa",sep=""),format="fasta",as.matrix=TRUE)
    snames <- row.names(s)
    o <- order(snames)
    snames <- snames[o]
    write.dna(s,paste(gsub("SEQ","pol",stubs[i],fixed=TRUE),".fas",sep=""),format="fasta",nbcol=-1,colsep="")
}