In [1]:
library(dplyr)
library(tidyr)
library(sqldf)
library(splitstackshape)
library(stringr)
library(compare)
setwd("../src")
In [2]:
source("EuretosInfrastructure.R")
options(warn=-1)
In [3]:
yeast_genes<-read.csv("20170119_GeneList_DSM.txt",header=TRUE,sep="\t")
In [4]:
start<-getConceptID(tolower(as.character(yeast_genes[,"SGD_ID"])))
start<-start[,"EKP_Concept_Id"]
In [5]:
head(start)
In [6]:
end <- unlist(getResistanceEKPID())
end<-end["content.id"] #EKP ID of resistance to chemicals
In [7]:
head(end)
In [8]:
end2<- unlist(getButanolID())
end2<-end2["content.id"] # EKP ID of butanol
In [9]:
head(end2)
In [ ]:
resistance2Chemicals<-getIndirectRelation(start,end)
In [ ]:
save(resistance2Chemicals, file = "resistance2Chemicals.rda")
In [10]:
load(file="resistance2Chemicals.rda")
In [ ]:
resistance2Butanol<-getIndirectRelation(start,end2)
In [ ]:
save(resistance2Butanol, file = "resistance2Butanol.rda")
In [12]:
load(file="resistance2Butanol.rda")
In [13]:
dfs1<-as.matrix(getTableFromJson(resistance2Chemicals))
dfs1<- data.frame(dfs1, stringsAsFactors=FALSE)
In [14]:
dfs2<-as.matrix(getTableFromJson(resistance2Butanol))
dfs2<- data.frame(dfs2, stringsAsFactors=FALSE)
In [15]:
comparison <- compare(dfs1,dfs2,allowAll=TRUE)
dfs<-comparison$tM
In [16]:
head(dfs)
In [17]:
dim(dfs)
In [18]:
pred<-read.csv("Reference_Predicate_List.csv",header=TRUE)
pred<-pred[,c(2,3)]
colnames(pred)<-c("pred","names")
subject_name<-getConceptName(dfs[,"Subject"])
dfs<-cbind(dfs,subject_name[,2])
object_name<-getConceptName(dfs[,"Object"])
dfs<-cbind(dfs,object_name[,2])
predicate_name<-sqldf('select * from dfs left join pred on pred.pred=dfs.Predicate')
pbs<-getPubMedId(dfs$Publications)
tripleName<-cbind(subject_name[,"name"],as.character(predicate_name[,"names"]),object_name[,"name"],pbs,dfs[,"Score"])
tripleName<-tripleName[,c(1,2,3,5,6)]
colnames(tripleName)<-c("Subject","Predicate","Object","Provenance","Score")
In [20]:
head(tripleName)
In [21]:
dim(tripleName)
In [22]:
write.table(tripleName,file="./triple.csv",sep=";",row.names=FALSE)
In [23]:
gr2c<-filter(dfs1,Subject==start) ## genes involving resistance to chemicals
gr2b<-filter(dfs2,Subject==start) ## genes involving resistance to chemicals
interRC_RB<-intersect(gr2c[,1],gr2b[,1])
### Genes pre9,nkp2,snt307,rtg1
DSM_Genes <- getConceptName(gr2b[,"Subject"])
relationship <- sqldf('select * from gr2b left join pred on pred.pred=gr2b.Predicate')
relationship<- relationship$names
represent<-cbind(DSM_Genes,gr2b$Score,relationship)
pubmedID<-getPubMedId(gr2b$Publications)
represent<-cbind(represent,relationship,pubmedID)
represent<-represent[,c("name","gr2b$Score","relationship","V2")]
names(represent)<-c("DSMGenes","AssociationScoreButanol","RelationshipBtwGenesButanol","Publications")
write.csv(represent,file="RepresentationSummary.csv")
In [ ]: