notebook.community

Edit and run



In [3]:

    
#libraries
library(GO.db)
library(topGO)
library(org.Hs.eg.db)
library(org.Sc.sgd.db)
library(GOSemSim)









    



Loading required package: AnnotationDbi
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, cbind, colnames, do.call,
    duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
    is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
    paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
    Reduce, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit, which, which.max, which.min

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: IRanges
Loading required package: S4Vectors

Attaching package: ‘S4Vectors’

The following objects are masked from ‘package:base’:

    colMeans, colSums, expand.grid, rowMeans, rowSums


Loading required package: graph
Loading required package: SparseM

Attaching package: ‘SparseM’

The following object is masked from ‘package:base’:

    backsolve


groupGOTerms: 	GOBPTerm, GOMFTerm, GOCCTerm environments built.

Attaching package: ‘topGO’

The following object is masked from ‘package:IRanges’:

    members



GOSemSim v2.0.4  For help: https://guangchuangyu.github.io/GOSemSim

If you use GOSemSim in published research, please cite:
Guangchuang Yu, Fei Li, Yide Qin, Xiaochen Bo, Yibo Wu, Shengqi Wang. GOSemSim: an R package for measuring semantic similarity among GO terms and gene products Bioinformatics 2010, 26(7):976-978



In [30]:

    
file <- "yeast_union"

ont="BP"
db <- org.Sc.sgd.db
mapping <- "org.Sc.sgd.db"
ID <- "ENSEMBL"



In [31]:

    
##load all community gene lists
setwd(sprintf("/home/david/Documents/ghsom/"))

#background gene list
backgroundFilename <- sprintf("%s_all_genes.txt", file)
allGenes <- scan(backgroundFilename, character())



In [7]:

    
scGO <- godata(mapping, ont=ont, keytype=ID)









    



[1] "preparing gene to GO mapping data..."
[1] "preparing IC data..."



In [12]:

    
semanticDistances <- mgeneSim(allGenes, semData=scGO, measure="Resnik", combine="BMA", verbose=FALSE)



In [14]:

    
ncol(semanticDistances)



In [10]:

    
semanticDistances <- 1 - semanticDistances



In [11]:

    
head(semanticDistances)









    





YGR046W YBR057C YJR122W YDR503C YGR136W YPR048W YGL175C YOR262W YDL246C YNL122C ⋯ YGR024C YGL116W YJL057C YOL082W YJL110C YFR002W YGL025C YHR129C YOR078W YGL153W

	YGR046W 0.000 0.704 0.814 0.367 0.949 0.769 0.751 0.882 0.786 0.672 ⋯    0.850 0.865 0.748 0.807 0.826 0.919 0.832 0.916 0.878 0.906
	YBR057C 0.704 0.000 0.784 0.719 0.782 0.788 0.487 0.771 0.788 0.554 ⋯    0.381 0.685 0.652 0.787 0.769 0.845 0.743 0.805 0.720 0.736
	YJR122W 0.814 0.784 0.000 0.809 0.845 0.000 0.784 0.809 0.845 0.754 ⋯    0.864 0.862 0.836 0.608 0.834 0.694 0.689 0.856 0.779 0.862
	YDR503C 0.367 0.719 0.809 0.000 0.812 0.738 0.667 0.851 0.643 0.681 ⋯    0.756 0.801 0.577 0.787 0.746 0.888 0.843 0.876 0.817 0.842
	YGR136W 0.949 0.782 0.845 0.812 0.000 0.845 0.795 0.847 0.960 0.843 ⋯    0.969 0.760 0.966 0.829 0.890 0.676 0.860 0.845 0.942 0.877
	YPR048W 0.769 0.788 0.000 0.738 0.845 0.000 0.778 0.889 0.560 0.678 ⋯    0.766 0.841 0.824 0.799 0.786 0.838 0.720 0.903 0.840 0.862



In [ ]:

    
semanticDistances2 <- sapply(allGenes, function(i) {
    sapply(allGenes, function(j) {
        geneSim(i, j, semData=scGO, measure="Wang", combine="BMA")["geneSim"]
    })
})



In [ ]:

    
length(allGenes)



In [ ]:

    
nrow(semanticDistances2)



In [ ]:

    
semanticDistances2[is.na(semanticDistances2)] <- 0



In [ ]:

    
write.table(semanticDistances, sep=",", file = sprintf("%s_wang_similarity.csv", file), row.names=FALSE, col.names=FALSE)



In [15]:

    
library(GOSim)

setOntology(ont, loadIC=FALSE)
setEvidenceLevel(evidences="all",organism=org.Sc.sgdORGANISM, gomap=org.Sc.sgdGO)

head(allGenes)









    



Loading required package: annotate
Loading required package: XML

Attaching package: ‘XML’

The following object is masked from ‘package:graph’:

    addNode



In [33]:

    
allGeneSims <- 1 - getGeneSim(allGenes, similarity="funSimMax", 
                          similarityTerm="relevance", normalization = TRUE)

allGeneSims[is.na(allGeneSims)] <- 1









    



filtering out genes not mapping to the currently set GO category ... ===> list of  1647 genes reduced to  1529 






    



Warning message in getGeneSim(allGenes, similarity = "funSimMax", similarityTerm = "relevance", :
“Similarity matrix contains values > 1! This may happen with simlarity='funSimMax', if one gene's GO annotation is a complete subset of another gene's GO annotation.”



In [36]:

    
head(allGeneSims)









    





YLR268W YJL155C YBR255W YFR027W YJR122W YLR244C YPL144W YBR135W YBR160W YDL238C ⋯ YDR076W YDR508C YPR028W YMR117C YLR096W YCR067C YMR047C YML055W YGL015C YPL013C

	YLR268W 0.0000000 0.9938983 1         0.8118516 0.7318703 1.0000000 0.7339676 0.97257857 0.27822262 0.9833591 ⋯         0.8929767 0.175877  0.2725629 0.8150920 0.32610054 0.1846623 0.2633827 0.5411890 1         0.79901481
	YJL155C 0.9938983 0.0000000 1         0.4416787 0.8477656 0.1645404 0.9849700 0.40300985 0.40931441 0.5005958 ⋯         0.4386883 1.000000  0.9874740 0.9814483 0.38235845 1.0000000 0.5504462 0.5971853 1         0.28307242
	YBR255W 1.0000000 1.0000000 0         1.0000000 1.0000000 1.0000000 1.0000000 1.00000000 1.00000000 1.0000000 ⋯         1.0000000 1.000000  1.0000000 1.0000000 1.00000000 1.0000000 1.0000000 1.0000000 1         1.00000000
	YFR027W 0.8118516 0.4416787 1         0.0000000 0.6558710 0.1292241 0.6593126 0.15098605 0.11972270 0.3179332 ⋯         0.2183519 1.000000  0.5952518 0.3655197 0.53420515 0.9757161 0.5486758 0.7136951 1         0.26000288
	YJR122W 0.7318703 0.8477656 1         0.6558710 0.0000000 0.8896328 0.5773167 0.71418507 0.59620407 0.9044549 ⋯         0.5980483 1.000000  0.6167872 0.5772192 0.85076796 1.0000000 0.7613731 0.8959624 1         0.71567447
	YLR244C 1.0000000 0.1645404 1         0.1292241 0.8896328 0.0000000 1.0000000 0.05909364 0.06133358 0.4717284 ⋯         0.1245602 1.000000  1.0000000 1.0000000 0.02449147 0.9400956 0.3237669 0.3793700 1         0.06517584



In [37]:

    
write.table(allGeneSims, sep=",", file = sprintf("%s_rel_similarity_GOSim.csv", file), row.names=TRUE, col.names=FALSE)

	YGR046W	YBR057C	YJR122W	YDR503C	YGR136W	YPR048W	YGL175C	YOR262W	YDL246C	YNL122C	⋯	YGR024C	YGL116W	YJL057C	YOL082W	YJL110C	YFR002W	YGL025C	YHR129C	YOR078W	YGL153W
YGR046W	0.000	0.704	0.814	0.367	0.949	0.769	0.751	0.882	0.786	0.672	⋯	0.850	0.865	0.748	0.807	0.826	0.919	0.832	0.916	0.878	0.906
YBR057C	0.704	0.000	0.784	0.719	0.782	0.788	0.487	0.771	0.788	0.554	⋯	0.381	0.685	0.652	0.787	0.769	0.845	0.743	0.805	0.720	0.736
YJR122W	0.814	0.784	0.000	0.809	0.845	0.000	0.784	0.809	0.845	0.754	⋯	0.864	0.862	0.836	0.608	0.834	0.694	0.689	0.856	0.779	0.862
YDR503C	0.367	0.719	0.809	0.000	0.812	0.738	0.667	0.851	0.643	0.681	⋯	0.756	0.801	0.577	0.787	0.746	0.888	0.843	0.876	0.817	0.842
YGR136W	0.949	0.782	0.845	0.812	0.000	0.845	0.795	0.847	0.960	0.843	⋯	0.969	0.760	0.966	0.829	0.890	0.676	0.860	0.845	0.942	0.877
YPR048W	0.769	0.788	0.000	0.738	0.845	0.000	0.778	0.889	0.560	0.678	⋯	0.766	0.841	0.824	0.799	0.786	0.838	0.720	0.903	0.840	0.862

	YLR268W	YJL155C	YBR255W	YFR027W	YJR122W	YLR244C	YPL144W	YBR135W	YBR160W	YDL238C	⋯	YDR076W	YDR508C	YPR028W	YMR117C	YLR096W	YCR067C	YMR047C	YML055W	YGL015C	YPL013C
YLR268W	0.0000000	0.9938983	1	0.8118516	0.7318703	1.0000000	0.7339676	0.97257857	0.27822262	0.9833591	⋯	0.8929767	0.175877	0.2725629	0.8150920	0.32610054	0.1846623	0.2633827	0.5411890	1	0.79901481
YJL155C	0.9938983	0.0000000	1	0.4416787	0.8477656	0.1645404	0.9849700	0.40300985	0.40931441	0.5005958	⋯	0.4386883	1.000000	0.9874740	0.9814483	0.38235845	1.0000000	0.5504462	0.5971853	1	0.28307242
YBR255W	1.0000000	1.0000000	0	1.0000000	1.0000000	1.0000000	1.0000000	1.00000000	1.00000000	1.0000000	⋯	1.0000000	1.000000	1.0000000	1.0000000	1.00000000	1.0000000	1.0000000	1.0000000	1	1.00000000
YFR027W	0.8118516	0.4416787	1	0.0000000	0.6558710	0.1292241	0.6593126	0.15098605	0.11972270	0.3179332	⋯	0.2183519	1.000000	0.5952518	0.3655197	0.53420515	0.9757161	0.5486758	0.7136951	1	0.26000288
YJR122W	0.7318703	0.8477656	1	0.6558710	0.0000000	0.8896328	0.5773167	0.71418507	0.59620407	0.9044549	⋯	0.5980483	1.000000	0.6167872	0.5772192	0.85076796	1.0000000	0.7613731	0.8959624	1	0.71567447
YLR244C	1.0000000	0.1645404	1	0.1292241	0.8896328	0.0000000	1.0000000	0.05909364	0.06133358	0.4717284	⋯	0.1245602	1.000000	1.0000000	1.0000000	0.02449147	0.9400956	0.3237669	0.3793700	1	0.06517584