In [22]:
library(dplyr)
library(tidyr)
library(sqldf)
library(splitstackshape)
library(stringr)
library(compare)
In [23]:
setwd("~/ODEX4all-UseCases/Bayer/data")
source("..//src/EuretosInfrastructure.R")
options(warn=-1)
In [24]:
rice_genes <-read.csv("GeneInformationTable_Qtaro_Selected.csv",header=TRUE)
In [25]:
head(rice_genes)
In [26]:
start<-getConceptID(rice_genes[,"locus_id"])
start<-start[,"EKP_Concept_Id"]
In [27]:
head(start)
In [28]:
traits<-c("TO:0000590","TO:0000382","TO:0000396","TO:0000397","TO:0000734","TO:0000402","TO:0002759","TO:0000447")
In [29]:
end<-NULL
for (i in 1:length(traits)){
tmp <- getTraitEKPID(traits[i])
tmpContent<-cbind(traits[i],tmp)
end<-rbind(end,tmpContent)
}
end<-end[,c(2,3,4)]
colnames(end)<-c("TOid","TOEKPid","TOContentName")
head(end)
In [30]:
genes2Trait<-getIndirectRelation(start,end[c(3,7,8),"TOEKPid"])
save(genes2Trait, file = "genes2Trait.rda")
In [31]:
neig<-read.csv("NeighbouringTraitEKPid.csv",stringsAsFactors = FALSE,header=TRUE)
genes2TraitNeighbours<-getIndirectRelation(start,end[c(3,7,8),"TOEKPid"])
save(genes2TraitNeighbours, file = "genes2TraitNeighbours.rda")
In [32]:
Trait2TraitNeighbours<-getIndirectRelation(unique(neig[,1]),unique(neig[,2]))
save(Trait2TraitNeighbours, file = "Trait2TraitNeighbours.rda")
In [33]:
genes2TraitsDirect<-getIndirectRelation(start,end[,"TOEKPid"])
save(genes2TraitsDirect, file = "genes2TraitsDirect.rda")
In [39]:
load("genes2Trait.rda")
load("genes2TraitNeighbours.rda")
load("Traits_and_their_neighbours.rda")
load("genes2TraitsDirect.rda")
genes2Trait<-as.matrix(getTableFromJson(genes2Trait))
genes2TraitNeighbours<-as.matrix(getTableFromJson(genes2TraitNeighbours))
Traits_and_their_neighbours<-as.matrix(getTableFromJson(a))
genes2TraitsDirect <- as.matrix(getTableFromJson(genes2TraitsDirect))
dfs<-data.frame(unique(rbind(genes2Trait,genes2TraitNeighbours,Traits_and_their_neighbours,genes2TraitsDirect)))
In [44]:
head(dfs)
In [36]:
pred<-read.csv("Reference_Predicate_List.csv",header=TRUE)
pred<-pred[,c(2,3)]
colnames(pred)<-c("pred","names")
subject_name<-getConceptName(dfs[,"Subject"])
dfs<-cbind(dfs,subject_name[,1])
object_name<-getConceptName(dfs[,"Object"])
dfs<-cbind(dfs,object_name[,1])
predicate_name<-sqldf('select * from dfs left join pred on pred.pred=dfs.Predicate')
pbs<-getPubMedId(dfs$Publications)
tripleName<-cbind(subject_name,as.character(predicate_name[,"names"]),object_name,pbs,as.character(dfs[,"Score"]))
colnames(tripleName)<-c("Subject","Predicate","Object","Provenance","Score")
write.table(tripleName,file="~/ODEX4all-UseCases/Bayer/data/Results_Genes_Traits.csv",sep=",",row.names = FALSE)
In [45]:
head(tripleName)