notebook.community

Edit and run



In [1]:

    
#libraries
library(GO.db)
library(topGO)
# library(org.Hs.eg.db)
library(org.Sc.sgd.db)
library(GOSemSim)









    



Loading required package: AnnotationDbi
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, cbind, colnames, do.call,
    duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
    is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
    paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
    Reduce, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit, which, which.max, which.min

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: IRanges
Loading required package: S4Vectors

Attaching package: ‘S4Vectors’

The following objects are masked from ‘package:base’:

    colMeans, colSums, expand.grid, rowMeans, rowSums


Loading required package: graph
Loading required package: SparseM

Attaching package: ‘SparseM’

The following object is masked from ‘package:base’:

    backsolve


groupGOTerms: 	GOBPTerm, GOMFTerm, GOCCTerm environments built.

Attaching package: ‘topGO’

The following object is masked from ‘package:IRanges’:

    members


GOSemSim v2.0.4  For help: https://guangchuangyu.github.io/GOSemSim

If you use GOSemSim in published research, please cite:
Guangchuang Yu, Fei Li, Yide Qin, Xiaochen Bo, Yibo Wu, Shengqi Wang. GOSemSim: an R package for measuring semantic similarity among GO terms and gene products Bioinformatics 2010, 26(7):976-978



In [2]:

    
file <- "yeast_uetz"

ont <- "BP"
p <- 0.8
init <- 1

db <- org.Sc.sgd.db
mapping <- "org.Sc.sgd.db"
ID <- "ENSEMBL"
# db <- org.Hs.eg.db
# mapping <- "org.Hs.eg.db"
# ID <- "ENTREZ"

##load all community gene lists
setwd(sprintf("/home/david/Documents/ghsom/%s_communities_%s_%s", file, p, init))

#background gene list
backgroundFilename <- "all_genes.txt"
allGenes <- scan(backgroundFilename, character())

#load communities from file
g <- list()
numCom <- 0
filename <- sprintf("community_%s.txt", numCom)
while (file.exists(filename)) {
    numCom <- numCom + 1
    g[[numCom]] <- scan(filename, character())
    filename <- sprintf("community_%s.txt", numCom)
}

#distances between neurons
shortest.path <- read.csv("shortest_path.csv", sep=",", header=FALSE)



In [3]:

    
numCom



In [4]:

    
##SEMATIC SIMILARITY
#construct gosemsim object

scGO <- godata(mapping, ont=ont, keytype=ID)
print("DONE")









    



[1] "preparing gene to GO mapping data..."
[1] "preparing IC data..."
[1] "DONE"



In [64]:

    
allGeneNames <- scan(character(), file="../yeast_uetz_communities_0.5_1/all_genes.txt")

g  <- sapply(g, function(i) allGeneNames[as.integer(i)])
allGenes <- allGeneNames[as.integer(allGenes)]



In [5]:

    
interestingGenes <- factor(as.integer(allGenes %in% g[[1]]))
names(interestingGenes) <- allGenes

GOdata <- new("topGOdata", description=sprintf("topGO object"),
          ontology = ont, allGenes = interestingGenes,
          annotationFun = annFUN.org, mapping = mapping, 
          ID = ID, nodeSize = 10)









    



Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )



In [9]:

    
usedGO(GOdata)









    





	'GO:0000003'
	'GO:0000070'
	'GO:0000075'
	'GO:0000278'
	'GO:0000280'
	'GO:0000288'
	'GO:0000723'
	'GO:0000819'
	'GO:0000956'
	'GO:0005975'
	'GO:0006082'
	'GO:0006091'
	'GO:0006139'
	'GO:0006259'
	'GO:0006281'
	'GO:0006302'
	'GO:0006310'
	'GO:0006325'
	'GO:0006351'
	'GO:0006352'
	'GO:0006355'
	'GO:0006357'
	'GO:0006366'
	'GO:0006367'
	'GO:0006396'
	'GO:0006397'
	'GO:0006401'
	'GO:0006402'
	'GO:0006403'
	'GO:0006412'
	'GO:0006417'
	'GO:0006461'
	'GO:0006464'
	'GO:0006468'
	'GO:0006508'
	'GO:0006518'
	'GO:0006520'
	'GO:0006605'
	'GO:0006606'
	'GO:0006725'
	'GO:0006753'
	'GO:0006793'
	'GO:0006796'
	'GO:0006807'
	'GO:0006810'
	'GO:0006886'
	'GO:0006913'
	'GO:0006914'
	'GO:0006950'
	'GO:0006974'
	'GO:0006996'
	'GO:0006997'
	'GO:0007010'
	'GO:0007033'
	'GO:0007034'
	'GO:0007049'
	'GO:0007059'
	'GO:0007067'
	'GO:0007126'
	'GO:0007154'
	'GO:0007165'
	'GO:0007346'
	'GO:0008104'
	'GO:0008150'
	'GO:0008152'
	'GO:0009056'
	'GO:0009057'
	'GO:0009058'
	'GO:0009059'
	'GO:0009116'
	'GO:0009117'
	'GO:0009123'
	'GO:0009161'
	'GO:0009259'
	'GO:0009628'
	'GO:0009653'
	'GO:0009889'
	'GO:0009890'
	'GO:0009891'
	'GO:0009892'
	'GO:0009893'
	'GO:0009987'
	'GO:0010033'
	'GO:0010467'
	'GO:0010468'
	'GO:0010556'
	'GO:0010557'
	'GO:0010558'
	'GO:0010564'
	'GO:0010604'
	'GO:0010605'
	'GO:0010608'
	'GO:0010628'
	'GO:0010629'
	'GO:0010639'
	'GO:0015031'
	'GO:0015931'
	'GO:0016043'
	'GO:0016070'
	'GO:0016071'
	'GO:0016192'
	'GO:0016236'
	'GO:0016310'
	'GO:0016569'
	'GO:0017038'
	'GO:0018130'
	'GO:0018193'
	'GO:0019219'
	'GO:0019222'
	'GO:0019438'
	'GO:0019439'
	'GO:0019538'
	'GO:0019637'
	'GO:0019693'
	'GO:0019752'
	'GO:0022402'
	'GO:0022414'
	'GO:0022607'
	'GO:0022613'
	'GO:0023052'
	'GO:0030154'
	'GO:0030163'
	'GO:0031323'
	'GO:0031324'
	'GO:0031325'
	'GO:0031326'
	'GO:0031327'
	'GO:0031328'
	'GO:0032200'
	'GO:0032268'
	'GO:0032502'
	'GO:0032774'
	'GO:0033036'
	'GO:0033043'
	'GO:0033365'
	'GO:0033554'
	'GO:0034248'
	'GO:0034504'
	'GO:0034613'
	'GO:0034622'
	'GO:0034641'
	'GO:0034645'
	'GO:0034654'
	'GO:0034655'
	'GO:0034660'
	'GO:0035556'
	'GO:0036211'
	'GO:0040007'
	'GO:0042221'
	'GO:0042254'
	'GO:0042592'
	'GO:0043043'
	'GO:0043170'
	'GO:0043412'
	'GO:0043436'
	'GO:0043603'
	'GO:0043604'
	'GO:0043623'
	'GO:0043632'
	'GO:0043933'
	'GO:0044085'
	'GO:0044087'
	'GO:0044237'
	'GO:0044238'
	'GO:0044248'
	'GO:0044249'
	'GO:0044257'
	'GO:0044260'
	'GO:0044265'
	'GO:0044267'
	'GO:0044270'
	'GO:0044271'
	'GO:0044281'
	'GO:0044283'
	'GO:0044699'
	'GO:0044700'
	'GO:0044702'
	'GO:0044710'
	'GO:0044711'
	'GO:0044712'
	'GO:0044723'
	'GO:0044744'
	'GO:0044763'
	'GO:0044765'
	'GO:0044767'
	'GO:0044770'
	'GO:0044772'
	'GO:0044802'
	'GO:0045184'
	'GO:0045786'
	'GO:0045892'
	'GO:0045893'
	'GO:0045934'
	'GO:0045935'
	'GO:0045944'
	'GO:0046483'
	'GO:0046700'
	'GO:0046907'
	'GO:0048285'
	'GO:0048518'
	'GO:0048519'
	'GO:0048522'
	'GO:0048523'
	'GO:0048583'
	'GO:0048856'
	'GO:0048869'
	'GO:0050657'
	'GO:0050658'
	'GO:0050789'
	'GO:0050794'
	'GO:0050896'
	'GO:0051028'
	'GO:0051128'
	'GO:0051129'
	'GO:0051169'
	'GO:0051170'
	'GO:0051171'
	'GO:0051172'
	'GO:0051173'
	'GO:0051179'
	'GO:0051186'
	'GO:0051234'
	'GO:0051236'
	'GO:0051246'
	'GO:0051252'
	'GO:0051253'
	'GO:0051254'
	'GO:0051276'
	'GO:0051301'
	'GO:0051321'
	'GO:0051641'
	'GO:0051649'
	'GO:0051716'
	'GO:0051726'
	'GO:0051783'
	'GO:0055086'
	'GO:0055114'
	'GO:0060249'
	'GO:0060255'
	'GO:0061024'
	'GO:0065003'
	'GO:0065004'
	'GO:0065007'
	'GO:0065008'
	'GO:0065009'
	'GO:0070271'
	'GO:0070727'
	'GO:0070887'
	'GO:0071702'
	'GO:0071704'
	'GO:0071705'
	'GO:0071822'
	'GO:0071824'
	'GO:0071840'
	'GO:0072594'
	'GO:0080090'
	'GO:0090304'
	'GO:0090305'
	'GO:0090407'
	'GO:0097659'
	'GO:0098813'
	'GO:1901135'
	'GO:1901360'
	'GO:1901361'
	'GO:1901362'
	'GO:1901564'
	'GO:1901566'
	'GO:1901575'
	'GO:1901576'
	'GO:1901605'
	'GO:1901657'
	'GO:1901987'
	'GO:1902578'
	'GO:1902580'
	'GO:1902582'
	'GO:1902589'
	'GO:1902593'
	'GO:1902679'
	'GO:1902680'
	'GO:1903046'
	'GO:1903047'
	'GO:1903506'
	'GO:1903507'
	'GO:1903508'
	'GO:2000112'
	'GO:2000113'
	'GO:2001141'



In [16]:

    
goID <- "GO:0006914"
gene.universe <- genes(GOdata)
go.genes <- genesInTerm(GOdata, goID)[[1]]
sig.genes <- sigGenes(GOdata)

my.group <- new("classicCount", testStatistic = GOFisherTest, name = "fisher",
                 allMembers = gene.universe, groupMembers = go.genes,
                 sigMembers = sig.genes)
t <- contTable(my.group)

library(gridExtra)
grid.table(t)

runTest(my.group)









    




0.000111024375808973



In [13]:

    
enrichedGOTerms <- function(genes, allGenes, cutoff, correction, ont, mapping, ID, algorithm){
    interestingGenes <- factor(as.integer(allGenes %in% genes))
    names(interestingGenes) <- allGenes
    
    GOdata <- new("topGOdata", description=sprintf("topGO object"),
              ontology = ont, allGenes = interestingGenes,
              annotationFun = annFUN.org, mapping = mapping, 
              ID = ID, nodeSize = 10)
    
    result <- runTest(GOdata, algorithm = algorithm, statistic = "fisher")
    if (correction){
        GOs <- score(result)[which(p.adjust(score(result), method="BH") <= cutoff)]
    } else {
        GOs <- score(result)[score(result) <= cutoff]
    }
    
    plot <- showSigOfNodes(GOdata, score(result), firstSigNodes = 10, useInfo ='all', swPlot = FALSE)
    
    return(list(GOdata, GOs, plot))
}



In [14]:

    
enrichedGOs  <- sapply(g, enrichedGOTerms, allGenes=allGenes, 
                      cutoff=0.01, correction=FALSE, ont=ont, mapping=mapping, ID=ID, algorithm="elim")









    



Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 252 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 13:	1 nodes to be scored	(0 eliminated genes)

	 Level 12:	4 nodes to be scored	(0 eliminated genes)

	 Level 11:	5 nodes to be scored	(0 eliminated genes)

	 Level 10:	5 nodes to be scored	(0 eliminated genes)

	 Level 9:	10 nodes to be scored	(0 eliminated genes)

	 Level 8:	13 nodes to be scored	(0 eliminated genes)

	 Level 7:	33 nodes to be scored	(20 eliminated genes)

	 Level 6:	46 nodes to be scored	(21 eliminated genes)

	 Level 5:	53 nodes to be scored	(28 eliminated genes)

	 Level 4:	44 nodes to be scored	(46 eliminated genes)

	 Level 3:	26 nodes to be scored	(46 eliminated genes)

	 Level 2:	11 nodes to be scored	(50 eliminated genes)

	 Level 1:	1 nodes to be scored	(50 eliminated genes)
Loading required package: Rgraphviz
Loading required package: grid

Attaching package: ‘grid’

The following object is masked from ‘package:topGO’:

    depth


Attaching package: ‘Rgraphviz’

The following objects are masked from ‘package:IRanges’:

    from, to

The following objects are masked from ‘package:S4Vectors’:

    from, to


Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 275 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 13:	1 nodes to be scored	(0 eliminated genes)

	 Level 12:	4 nodes to be scored	(0 eliminated genes)

	 Level 11:	6 nodes to be scored	(0 eliminated genes)

	 Level 10:	6 nodes to be scored	(0 eliminated genes)

	 Level 9:	12 nodes to be scored	(0 eliminated genes)

	 Level 8:	19 nodes to be scored	(0 eliminated genes)

	 Level 7:	34 nodes to be scored	(0 eliminated genes)

	 Level 6:	51 nodes to be scored	(0 eliminated genes)

	 Level 5:	57 nodes to be scored	(26 eliminated genes)

	 Level 4:	47 nodes to be scored	(26 eliminated genes)

	 Level 3:	26 nodes to be scored	(26 eliminated genes)

	 Level 2:	11 nodes to be scored	(26 eliminated genes)

	 Level 1:	1 nodes to be scored	(26 eliminated genes)

Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 225 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 13:	1 nodes to be scored	(0 eliminated genes)

	 Level 12:	2 nodes to be scored	(0 eliminated genes)

	 Level 11:	4 nodes to be scored	(0 eliminated genes)

	 Level 10:	4 nodes to be scored	(0 eliminated genes)

	 Level 9:	8 nodes to be scored	(0 eliminated genes)

	 Level 8:	14 nodes to be scored	(0 eliminated genes)

	 Level 7:	25 nodes to be scored	(0 eliminated genes)

	 Level 6:	37 nodes to be scored	(0 eliminated genes)

	 Level 5:	53 nodes to be scored	(36 eliminated genes)

	 Level 4:	44 nodes to be scored	(45 eliminated genes)

	 Level 3:	23 nodes to be scored	(45 eliminated genes)

	 Level 2:	9 nodes to be scored	(45 eliminated genes)

	 Level 1:	1 nodes to be scored	(45 eliminated genes)

Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 250 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 12:	4 nodes to be scored	(0 eliminated genes)

	 Level 11:	6 nodes to be scored	(11 eliminated genes)

	 Level 10:	6 nodes to be scored	(11 eliminated genes)

	 Level 9:	12 nodes to be scored	(22 eliminated genes)

	 Level 8:	17 nodes to be scored	(22 eliminated genes)

	 Level 7:	29 nodes to be scored	(31 eliminated genes)

	 Level 6:	45 nodes to be scored	(73 eliminated genes)

	 Level 5:	58 nodes to be scored	(73 eliminated genes)

	 Level 4:	42 nodes to be scored	(92 eliminated genes)

	 Level 3:	22 nodes to be scored	(92 eliminated genes)

	 Level 2:	8 nodes to be scored	(92 eliminated genes)

	 Level 1:	1 nodes to be scored	(92 eliminated genes)

Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 276 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 13:	1 nodes to be scored	(0 eliminated genes)

	 Level 12:	4 nodes to be scored	(0 eliminated genes)

	 Level 11:	6 nodes to be scored	(0 eliminated genes)

	 Level 10:	5 nodes to be scored	(12 eliminated genes)

	 Level 9:	11 nodes to be scored	(12 eliminated genes)

	 Level 8:	19 nodes to be scored	(12 eliminated genes)

	 Level 7:	33 nodes to be scored	(24 eliminated genes)

	 Level 6:	51 nodes to be scored	(28 eliminated genes)

	 Level 5:	60 nodes to be scored	(28 eliminated genes)

	 Level 4:	48 nodes to be scored	(28 eliminated genes)

	 Level 3:	26 nodes to be scored	(38 eliminated genes)

	 Level 2:	11 nodes to be scored	(38 eliminated genes)

	 Level 1:	1 nodes to be scored	(38 eliminated genes)

Building most specific GOs .....
	( 571 GO terms found. )

Build GO DAG topology ..........
	( 1846 GO terms and 4079 relations. )

Annotating nodes ...............
	( 250 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 271 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 13:	1 nodes to be scored	(0 eliminated genes)

	 Level 12:	3 nodes to be scored	(0 eliminated genes)

	 Level 11:	4 nodes to be scored	(0 eliminated genes)

	 Level 10:	5 nodes to be scored	(0 eliminated genes)

	 Level 9:	11 nodes to be scored	(0 eliminated genes)

	 Level 8:	16 nodes to be scored	(0 eliminated genes)

	 Level 7:	34 nodes to be scored	(0 eliminated genes)

	 Level 6:	51 nodes to be scored	(0 eliminated genes)

	 Level 5:	60 nodes to be scored	(0 eliminated genes)

	 Level 4:	49 nodes to be scored	(10 eliminated genes)

	 Level 3:	25 nodes to be scored	(10 eliminated genes)

	 Level 2:	11 nodes to be scored	(10 eliminated genes)

	 Level 1:	1 nodes to be scored	(10 eliminated genes)



In [23]:

    
lengths(enrichedGOs)



In [15]:

    
enrichedGOs[[2,1]]









    





	GO:0006914
		0.000111024375808973
	GO:0006605
		4.74925119630649e-05
	GO:0016192
		0.00087861623591962
	GO:0007033
		0.00200880989377714
	GO:0007034
		0.000101229244111927
	GO:0015031
		0.00717492505664814
	GO:0072594
		8.75731737522821e-05



In [17]:

    
lengths(g)



In [72]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 ⋯ V25 V26 V27 V28 V29 V30 V31 V32 V33 V34

	0 1 2 2 2 2 2 2 2 3 ⋯ 3 3 3 4 5 5 3 4 2 3
	1 0 1 1 1 1 1 1 1 2 ⋯ 2 2 2 3 4 4 2 3 1 2
	2 1 0 1 2 2 2 2 2 3 ⋯ 3 3 3 4 5 5 3 4 2 1
	2 1 1 0 2 2 2 2 2 3 ⋯ 3 3 3 4 5 5 3 4 2 1
	2 1 2 2 0 1 2 2 2 3 ⋯ 3 3 3 4 5 5 3 4 2 3
	2 1 2 2 1 0 1 2 2 3 ⋯ 2 3 3 3 4 4 3 4 2 3



In [ ]:

    
mgeneSim(g[[1]], semData=scGO, measure="Wang")



In [78]:

    
mgoSim(names(enrichedGOs[[1]]), names(enrichedGOs[[2]]), semData=scGO, measure="Resnik", combine="BMA")



In [79]:

    
mgoSim(names(enrichedGOs[[1]]), names(enrichedGOs[[32]]), semData=scGO, measure="Resnik", combine="BMA")



In [55]:

    
clusterSim(g[[1]], g[[2]], semData=scGO, measure="Wang", combine=NULL)









    





GO:0032049 GO:0006809 GO:0016226 GO:0045429 GO:0055114 GO:1901300 GO:0001402 GO:0006970 GO:0006972 GO:0007232 ⋯ GO:0015918 GO:0035376 GO:0035690 GO:0033617 GO:0016050 GO:0016485 GO:0030433 GO:0031503 GO:0007097 GO:0031578

	GO:0000077 0.121 0.094 0.121 0.050 0.161 0.191 0.311 0.282 0.242 0.371 ⋯    0.094 0.087 0.289 0.072 0.144 0.049 0.280 0.067 0.049 0.112
	GO:0001302 0.170 0.121 0.160 0.062 0.230 0.114 0.274 0.100 0.087 0.220 ⋯    0.131 0.124 0.171 0.094 0.198 0.066 0.175 0.093 0.065 0.069
	GO:0006457 0.129 0.198 0.276 0.093 0.191 0.079 0.211 0.186 0.163 0.172 ⋯    0.100 0.098 0.305 0.156 0.374 0.119 0.120 0.181 0.117 0.045
	GO:0033194 0.036 0.060 0.084 0.028 0.110 0.211 0.129 0.411 0.355 0.177 ⋯    0.061 0.058 0.309 0.048 0.094 0.070 0.174 0.100 0.069 0.014
	GO:0034599 0.073 0.111 0.143 0.059 0.089 0.326 0.250 0.334 0.290 0.351 ⋯    0.053 0.050 0.582 0.087 0.173 0.059 0.296 0.081 0.058 0.030
	GO:0042262 0.249 0.229 0.141 0.143 0.254 0.178 0.278 0.176 0.155 0.326 ⋯    0.079 0.074 0.192 0.058 0.094 0.197 0.422 0.046 0.037 0.126
	GO:0045454 0.160 0.114 0.147 0.176 0.198 0.206 0.478 0.089 0.078 0.395 ⋯    0.119 0.112 0.156 0.090 0.176 0.061 0.166 0.082 0.060 0.125
	GO:0061077 0.106 0.166 0.229 0.079 0.156 0.067 0.176 0.153 0.133 0.142 ⋯    0.083 0.080 0.251 0.128 0.304 0.097 0.101 0.146 0.095 0.038
	GO:0006378 0.151 0.243 0.143 0.137 0.117 0.035 0.073 0.052 0.044 0.059 ⋯    0.032 0.029 0.091 0.054 0.100 0.259 0.197 0.046 0.034 0.082
	GO:0016973 0.081 0.062 0.081 0.035 0.147 0.033 0.068 0.044 0.038 0.053 ⋯    0.348 0.317 0.036 0.025 0.039 0.175 0.105 0.134 0.201 0.046
	GO:0043488 0.085 0.100 0.141 0.137 0.175 0.067 0.166 0.089 0.077 0.135 ⋯    0.053 0.050 0.069 0.043 0.078 0.279 0.106 0.082 0.059 0.085
	GO:0045945 0.138 0.223 0.113 0.432 0.060 0.091 0.111 0.027 0.023 0.092 ⋯    0.018 0.016 0.047 0.030 0.050 0.133 0.112 0.023 0.019 0.102
	GO:1900152 0.085 0.138 0.076 0.195 0.059 0.061 0.116 0.026 0.022 0.096 ⋯    0.017 0.015 0.048 0.030 0.050 0.109 0.196 0.023 0.018 0.226
	GO:1900364 0.095 0.155 0.088 0.198 0.069 0.060 0.122 0.030 0.026 0.099 ⋯    0.019 0.017 0.055 0.033 0.058 0.159 0.127 0.026 0.020 0.145
	GO:0009228 0.265 0.322 0.158 0.198 0.180 0.070 0.127 0.038 0.033 0.105 ⋯    0.058 0.053 0.071 0.046 0.075 0.085 0.170 0.034 0.027 0.071
	GO:0000725 0.258 0.245 0.155 0.144 0.277 0.167 0.249 0.182 0.157 0.288 ⋯    0.081 0.075 0.201 0.063 0.108 0.208 0.412 0.049 0.039 0.121
	GO:0000730 0.171 0.196 0.309 0.109 0.220 0.115 0.188 0.156 0.129 0.206 ⋯    0.061 0.055 0.167 0.309 0.210 0.157 0.281 0.043 0.033 0.096
	GO:0043007 0.257 0.249 0.245 0.144 0.291 0.086 0.170 0.057 0.050 0.140 ⋯    0.084 0.079 0.098 0.163 0.303 0.217 0.308 0.052 0.040 0.190
	GO:0007533 0.201 0.137 0.172 0.079 0.342 0.089 0.180 0.067 0.059 0.148 ⋯    0.100 0.095 0.104 0.064 0.114 0.110 0.207 0.062 0.048 0.072
	GO:0008298 0.036 0.060 0.084 0.028 0.110 0.024 0.060 0.108 0.093 0.048 ⋯    0.215 0.205 0.081 0.048 0.094 0.070 0.036 0.383 0.252 0.014
	GO:0055085 0.186 0.126 0.162 0.067 0.224 0.127 0.285 0.098 0.086 0.235 ⋯    0.530 0.501 0.173 0.100 0.196 0.068 0.188 0.193 0.222 0.078
	GO:0000147 0.133 0.090 0.378 0.052 0.149 0.098 0.202 0.065 0.057 0.166 ⋯    0.095 0.088 0.118 0.302 0.377 0.046 0.142 0.060 0.046 0.124
	GO:0006897 0.034 0.058 0.086 0.026 0.115 0.022 0.059 0.113 0.096 0.046 ⋯    0.316 0.299 0.082 0.046 0.097 0.070 0.033 0.235 0.262 0.012
	GO:0030041 0.083 0.068 0.277 0.037 0.113 0.065 0.141 0.052 0.044 0.109 ⋯    0.066 0.059 0.090 0.540 0.298 0.034 0.097 0.046 0.034 0.082
	GO:0000288 0.125 0.216 0.126 0.119 0.105 0.029 0.062 0.046 0.038 0.049 ⋯    0.027 0.024 0.080 0.044 0.087 0.183 0.285 0.040 0.029 0.109
	GO:0034629 0.030 0.051 0.071 0.024 0.091 0.021 0.051 0.089 0.076 0.040 ⋯    0.176 0.166 0.067 0.041 0.077 0.058 0.030 0.672 0.206 0.012
	GO:0051654 0.034 0.054 0.076 0.027 0.095 0.023 0.055 0.094 0.081 0.044 ⋯    0.196 0.186 0.072 0.045 0.082 0.063 0.034 0.195 0.645 0.013
	GO:0006890 0.052 0.042 0.059 0.020 0.164 0.038 0.095 0.072 0.061 0.071 ⋯    0.413 0.381 0.055 0.034 0.062 0.047 0.057 0.146 0.305 0.022
	GO:0000086 0.115 0.088 0.115 0.045 0.165 0.083 0.195 0.072 0.061 0.152 ⋯    0.093 0.085 0.123 0.065 0.139 0.046 0.127 0.065 0.046 0.186
	GO:0006355 0.170 0.267 0.136 0.354 0.074 0.075 0.142 0.033 0.028 0.119 ⋯    0.022 0.020 0.058 0.037 0.061 0.164 0.136 0.029 0.023 0.135
	GO:0019918 0.132 0.156 0.153 0.080 0.136 0.031 0.073 0.060 0.050 0.056 ⋯    0.033 0.030 0.097 0.050 0.109 0.273 0.212 0.052 0.037 0.074
	GO:0034969 0.198 0.131 0.189 0.080 0.220 0.070 0.132 0.043 0.037 0.107 ⋯    0.064 0.059 0.075 0.149 0.231 0.210 0.288 0.039 0.030 0.167
	GO:0044257 0.186 0.184 0.167 0.104 0.138 0.041 0.085 0.061 0.052 0.070 ⋯    0.038 0.035 0.108 0.064 0.119 0.343 0.610 0.055 0.041 0.259
	GO:0051726 0.164 0.125 0.164 0.186 0.221 0.211 0.516 0.102 0.089 0.421 ⋯    0.127 0.119 0.176 0.098 0.202 0.068 0.170 0.095 0.067 0.179
	GO:0034605 0.074 0.113 0.147 0.059 0.092 0.176 0.255 0.472 0.408 0.414 ⋯    0.054 0.051 0.352 0.088 0.178 0.061 0.253 0.083 0.060 0.030
	GO:0008150 0.091 0.142 0.213 0.061 0.329 0.052 0.148 0.318 0.282 0.120 ⋯    0.161 0.162 0.223 0.116 0.289 0.199 0.080 0.323 0.193 0.030
	GO:0006446 0.184 0.302 0.155 0.375 0.086 0.080 0.153 0.038 0.033 0.129 ⋯    0.026 0.024 0.071 0.047 0.075 0.201 0.181 0.034 0.028 0.175
	GO:0019509 0.266 0.242 0.165 0.144 0.187 0.055 0.111 0.039 0.033 0.087 ⋯    0.051 0.045 0.070 0.042 0.075 0.129 0.174 0.034 0.026 0.066
	GO:0016579 0.168 0.174 0.170 0.091 0.153 0.035 0.081 0.067 0.057 0.064 ⋯    0.039 0.036 0.106 0.057 0.120 0.466 0.323 0.060 0.043 0.124
	GO:1902499 0.099 0.111 0.101 0.247 0.082 0.101 0.142 0.036 0.030 0.113 ⋯    0.022 0.019 0.064 0.036 0.069 0.182 0.167 0.031 0.023 0.131
	GO:0000055 0.046 0.033 0.114 0.019 0.113 0.035 0.075 0.050 0.043 0.058 ⋯    0.285 0.260 0.040 0.090 0.088 0.035 0.050 0.101 0.344 0.031
	GO:0006606 0.042 0.029 0.036 0.018 0.091 0.035 0.065 0.040 0.035 0.052 ⋯    0.332 0.306 0.033 0.025 0.035 0.030 0.047 0.185 0.193 0.023
	GO:0006999 0.068 0.107 0.301 0.054 0.092 0.047 0.113 0.090 0.077 0.091 ⋯    0.051 0.048 0.153 0.384 0.550 0.059 0.069 0.083 0.057 0.077
	GO:0051292 0.048 0.075 0.306 0.041 0.057 0.036 0.078 0.057 0.048 0.063 ⋯    0.034 0.031 0.099 0.620 0.342 0.038 0.051 0.051 0.037 0.061



In [52]:

    
clusterSim <- mclusterSim(g, semData=scGO, measure="Wang", combine="BMA")



In [53]:

    
head(clusterSim)









    






	1.000 0.603 0.638 0.574 0.654 0.627
	0.603 1.000 0.715 0.641 0.701 0.748
	0.638 0.715 1.000 0.660 0.726 0.718
	0.574 0.641 0.660 1.000 0.642 0.626
	0.654 0.701 0.726 0.642 1.000 0.676
	0.627 0.748 0.718 0.626 0.676 1.000



In [54]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6

	0 1 1 1 1 1
	1 0 1 2 2 2
	1 1 0 1 1 1
	1 2 1 0 2 2
	1 2 1 2 0 2
	1 2 1 2 2 0



In [5]:

    
pathways <- read.table("../biochemical_pathways.tab", sep="\t")
cols <- c("pathway_name", "enzyme_name", "E.C._reaction_number", "gene_name", "reference")
colnames(pathways) <- cols

toGene <- function(ORFIdentifiers){
    genes <- character()
    for (identifier in ORFIdentifiers){
        gene <- character()
        try(
            gene <- as.character(org.Sc.sgdGENENAME[identifier])
        )
        genes <- c(genes, gene)
    }
    return(genes)
}

toPath <- function(ORFIdentifiers){
    paths <- character()
    for (identifier in ORFIdentifiers){
        path <- character()
        try(
            path <- as.character(org.Sc.sgdPATH[identifier])
        )
        paths <- c(paths, path)
    }
    return(paths)
}

get_pathways <- function(ORFIdentifiers, pathways) {
    genes  <- toGene(ORFIdentifiers)
    return(subset(pathways, gene_name %in% genes)$pathway_name)
}

get_pathway_genes <- function(ORFIdentifiers, pathways) {
    genes  <- toGene(ORFIdentifiers)
    return(subset(pathways, gene_name %in% genes)$gene_name)
}



In [ ]:

    
pathway_list <- sapply(g, get_pathways, pathways)
pathway_genes <- sapply(g, get_pathway_genes, pathways)



In [140]:

    
enrichedGOsPathway <- sapply(pathway_genes[lengths(pathway_genes) > 0], enrichedGOTerms, allGenes=allGeneNames, 
                      cutOff=cutOff, correction=correction, ont=ont, mapping=mapping, ID=ID)









    



Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 92 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 67 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 264 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 209 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 126 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 61 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 221 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 96 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 168 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 44 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 30 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 95 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 178 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 129 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 82 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 191 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 105 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 41 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 80 nontrivial nodes
		 parameters: 
			 test statistic: fisher

Building most specific GOs .....
	( 1679 GO terms found. )

Build GO DAG topology ..........
	( 3637 GO terms and 8228 relations. )

Annotating nodes ...............
	( 1439 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 77 nontrivial nodes
		 parameters: 
			 test statistic: fisher



In [141]:

    
range <- 1:length(enrichedGOsPathway)

simsPathway <- sapply(range, function(i) sapply(range, function(j) 
                    mgoSim(names(enrichedGOsPathway[[i]]),
                        names(enrichedGOsPathway[[j]]),
                        semData=scGO, measure="Wang", combine="BMA")))



In [142]:

    
head(simsPathway)









    






	1.000 0.763 0.843 0.824 0.859 0.763 0.836 0.772 0.843 0.524 0.579 0.743 0.801 0.727 0.790 0.791 0.788 0.628 0.871 0.665
	0.763 1.000 0.769 0.707 0.719 0.742 0.718 0.745 0.822 0.584 0.727 0.804 0.856 0.649 0.631 0.653 0.707 0.466 0.723 0.565
	0.843 0.769 1.000 0.961 0.765 0.801 0.930 0.788 0.844 0.534 0.570 0.713 0.862 0.669 0.827 0.829 0.898 0.450 0.886 0.678
	0.824 0.707 0.961 1.000 0.741 0.763 0.926 0.760 0.823 0.509 0.533 0.664 0.828 0.659 0.857 0.828 0.902 0.448 0.872 0.675
	0.859 0.719 0.765 0.741 1.000 0.595 0.740 0.618 0.772 0.453 0.481 0.657 0.775 0.700 0.710 0.755 0.686 0.590 0.724 0.558
	0.763 0.742 0.801 0.763 0.595 1.000 0.783 0.746 0.773 0.550 0.662 0.667 0.722 0.558 0.671 0.619 0.776 0.406 0.785 0.697



In [118]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 ⋯ V22 V23 V24 V25 V26 V27 V28 V29 V30 V31

	0 1 1 1 1 1 2 2 2 3 ⋯ 4 4 5 4 5 5 6 6 7 7
	1 0 1 2 2 2 3 3 2 4 ⋯ 5 5 6 5 6 6 7 7 8 8
	1 1 0 1 1 2 2 3 1 3 ⋯ 5 5 5 4 5 5 6 6 7 7
	1 2 1 0 2 1 1 2 1 2 ⋯ 4 4 4 3 4 4 5 5 6 6
	1 2 1 2 0 2 3 3 2 4 ⋯ 5 5 6 5 6 6 7 7 8 8
	1 2 2 1 2 0 1 1 2 2 ⋯ 3 3 4 3 4 4 5 5 6 6



In [151]:

    
enrichedGOs[[1]]









    





	GO:0006508
		0.0305666626725701
	GO:0009057
		0.0268263523123756
	GO:0009896
		0.0275243318688756
	GO:0019538
		0.037645201260442
	GO:0030163
		0.038000496959972
	GO:0035966
		0.0369152752689263
	GO:0035967
		0.0232568990511012
	GO:0042176
		0.0192941639335425
	GO:0043632
		0.0317229083283707
	GO:0070646
		0.00379354097137652
	GO:0070647
		0.0151356737247353



In [ ]:

    
geneSimilarities <- sapply(allGenes, function(i) sapply(allGenes, function(j) geneSim(i, j, semData=scGO, combine="BMA")))



In [ ]:

    
geneSimilarities



In [6]:

    
cutOff <- 0.05

filename <- sprintf("%s-%s-%s-%s.rda", file, p, cutOff, ont)

if (file.exists(filename)){
    
    print(sprintf("loading: %s", filename))
    load(filename)
    print("loaded")
    
} else {
    
    print("creating topGO objects")

    geneLists <- vector("list", numCom) 
    GOdataObjects <- vector("list", numCom) 
    resultFishers <- vector("list", numCom) 
    results <- vector("list", numCom) 
    gos <- vector("list", numCom) 

    #perform enrichment analyses
    for (c in 1:numCom){

        #factor of interesting genes
        geneList <- factor(as.integer(allGenes %in% g[[c]]))
        names(geneList) <- allGenes
        geneLists[[c]] <- geneList

        #construct topGO object
        GOdata <- new("topGOdata", description=sprintf("topGO object for community %s", c),
                      ontology = ont, allGenes = geneList,
                      annotationFun = annFUN.org, mapping = mapping, 
                      ID = ID, nodeSize = 10)
        GOdataObjects[[c]] <- GOdata

        #fishers exact test classic
        resultFisher <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
        resultFishers[[c]] <- resultFisher

        #tabulate results
        allRes <- GenTable(GOdata, classicFisher = resultFisher,
                      orderBy = "classicFisher")
        results[[c]] <- allRes
        
        #go terms < cut off  Benjamini-Hochberg multiple hypothesis corrected pval
        gos[[c]] <- score(resultFisher)[which(p.adjust(score(resultFisher), method="BH") <= cutOff)]

        print(sprintf("community %s complete", c))
    }
    
    print(sprintf("Saving data: %s", filename))
    save(geneLists, GOdataObjects, resultFishers, results, gos, file=filename)
    print("saved")
}









    



[1] "creating topGO objects"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 355 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 1 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 581 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 2 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 835 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 3 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 681 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 4 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 586 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 5 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 644 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 6 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 877 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 7 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 706 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 8 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 831 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 9 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 408 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 10 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 516 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 11 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 567 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 12 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 706 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 13 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 531 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 14 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 473 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 15 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 612 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 16 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 577 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 17 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 501 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 18 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 463 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 19 complete"






    



Building most specific GOs .....
	( 1689 GO terms found. )

Build GO DAG topology ..........
	( 3643 GO terms and 8240 relations. )

Annotating nodes ...............
	( 1558 genes annotated to the GO terms. )

			 -- Classic Algorithm -- 

		 the algorithm is scoring 602 nontrivial nodes
		 parameters: 
			 test statistic: fisher






    



[1] "community 20 complete"
[1] "Saving data: yeast_union-0.7-0.05-BP.rda"
[1] "saved"



In [7]:

    
print_accession_number <- function(terms, file){
    for (s in strsplit(names(terms), ":")){
        write(s[2], file=file, append=TRUE)
    }
}



In [8]:

    
###write accession number to file
for (i in 1:length(gos)){
    accessionFile <- sprintf("accession_numbers-%s-%s-%s", cutOff, ont, i)
    print_accession_number(gos[[i]], file=accessionFile)
}



In [10]:

    
wangAllGeneSim <- mgeneSim(allGenes, semData=scGO, measure="Wang", combine="BMA", verbose=TRUE)









    



  |======================================================================| 100%



In [11]:

    
clusters <- hclust(as.dist(-log(wangAllGeneSim)))
clusterCut <- cutree(clusters, numCom)



In [12]:

    
plot(clusters)



In [13]:

    
assignedCommunities <- numeric(length(allGenes))
names(assignedCommunities) <- allGenes

for (i in 1:numCom){
    for (geneName in g[[i]]){
        assignedCommunities[geneName] <- i
    }
}



In [14]:

    
library(NMI)



In [15]:

    
assignedCommunities <- assignedCommunities[names(assignedCommunities) %in% names(clusterCut)]



In [16]:

    
assignedCommunitiesDF <- data.frame(assignedCommunities)
assignedCommunitiesDF <- cbind(Row.Names = rownames(assignedCommunitiesDF), assignedCommunitiesDF)



In [17]:

    
clusterCutDF <- data.frame(clusterCut)
clusterCutDF <- cbind(Row.Names = rownames(clusterCutDF), clusterCutDF)



In [18]:

    
NMI(assignedCommunitiesDF, clusterCutDF)









    




$value = 0.0711074542082947



In [98]:

    
most_representative_term_weighted <- function(namedTerms){
    
    counts <- numeric(length(namedTerms))
    names(counts) <- names(namedTerms)

    for (term in names(namedTerms)) {
        ancestors <- as.list(GOBPANCESTOR[term])
        for (ancestor in ancestors[[term]]) {
            if (ancestor %in% names(counts)) {
                counts[ancestor] <- counts[ancestor] + 1
            }
        }

    }
#     return (sort(counts / sum(counts), decreasing=TRUE))
    return (sort(counts / max(counts), decreasing=TRUE))
}



In [33]:

    
most_representative_term_ancestor <- function(namedTerms){
    
    counts <- numeric(length(namedTerms))
    names(counts) <- names(namedTerms)

    for (term in names(namedTerms)) {
        ancestors <- as.list(GOBPANCESTOR[term])
        for (ancestor in ancestors[[term]]) {
            if (ancestor %in% names(counts)) {
                counts[ancestor] <- counts[ancestor] + 1
            }
        }

    }
#     return (sort(counts / sum(counts), decreasing=TRUE))
    return (names(sort(counts / sum(counts), decreasing=TRUE)[1]))
}



In [34]:

    
representativeTermsAncestor <- sapply(Filter(length, gos), most_representative_term_ancestor)



In [35]:

    
select(GO.db, keys=representativeTermsAncestor, columns=c("TERM", "DEFINITION"))









    



'select()' returned many:1 mapping between keys and columns






    





GOID TERM DEFINITION

	GO:0006644                                                                                                                                                                                                                                                                                                                                                                                                          phospholipid metabolic process                                                                                                                                                                                                                                                                                                                                                                                      The chemical reactions and pathways involving phospholipids, any lipid containing phosphoric acid as a mono- or diester.                                                                                                                                                                                                                                                                                            
	GO:0007049                                                                                                                                                                                                                                                                                                                                                                                                          cell cycle                                                                                                                                                                                                                                                                                                                                                                                                          The progression of biochemical and morphological phases and events that occur in a cell during successive cell replication or nuclear replication events. Canonically, the cell cycle comprises the replication and segregation of genetic material followed by the division of the cell, but in endocycles or syncytial cells nuclear replication or nuclear division may not be followed by cell division.        
	GO:0044699                                                                                                                                                                                                                                                                                                                                                                                                          single-organism process                                                                                                                                                                                                                                                                                                                                                                                             A biological process that involves only one organism.                                                                                                                                                                                                                                                                                                                                                               
	GO:0044710                                                                                                                                                                                                                                                                                                                                                                                                          single-organism metabolic process                                                                                                                                                                                                                                                                                                                                                                                   A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.                                                                                                                                                                                                                              
	GO:0051128                                                                                                                                                                                                                                                                                                                                                                                                          regulation of cellular component organization                                                                                                                                                                                                                                                                                                                                                                       Any process that modulates the frequency, rate or extent of a process involved in the formation, arrangement of constituent parts, or disassembly of cell structures, including the plasma membrane and any external encapsulating structures such as the cell wall and cell envelope.                                                                                                                              
	GO:0034641                                                                                                                                                                                                                                                                                                                                                                                                          cellular nitrogen compound metabolic process                                                                                                                                                                                                                                                                                                                                                                        The chemical reactions and pathways involving various organic and inorganic nitrogenous compounds, as carried out by individual cells.                                                                                                                                                                                                                                                                              
	GO:0006725                                                                                                                                                                                                                                                                                                                                                                                                          cellular aromatic compound metabolic process                                                                                                                                                                                                                                                                                                                                                                        The chemical reactions and pathways involving aromatic compounds, any organic compound characterized by one or more planar rings, each of which contains conjugated double bonds and delocalized pi electrons, as carried out by individual cells.                                                                                                                                                                  
	GO:0044710                                                                                                                                                                                                                                                                                                                                                                                                          single-organism metabolic process                                                                                                                                                                                                                                                                                                                                                                                   A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.                                                                                                                                                                                                                              
	GO:0044238                                                                                                                                                                                                                                                                                                                                                                                                          primary metabolic process                                                                                                                                                                                                                                                                                                                                                                                           The chemical reactions and pathways involving those compounds which are formed as a part of the normal anabolic and catabolic processes. These processes take place in most, if not all, cells of the organism.                                                                                                                                                                                                     
	GO:0044699                                                                                                                                                                                                                                                                                                                                                                                                          single-organism process                                                                                                                                                                                                                                                                                                                                                                                             A biological process that involves only one organism.                                                                                                                                                                                                                                                                                                                                                               
	GO:0005975                                                                                                                                                                                                                                                                                                                                                                                                          carbohydrate metabolic process                                                                                                                                                                                                                                                                                                                                                                                      The chemical reactions and pathways involving carbohydrates, any of a group of organic compounds based of the general formula Cx(H2O)y. Includes the formation of carbohydrate derivatives by the addition of a carbohydrate residue to another molecule.                                                                                                                                                           
	GO:0044710                                                                                                                                                                                                                                                                                                                                                                                                          single-organism metabolic process                                                                                                                                                                                                                                                                                                                                                                                   A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.                                                                                                                                                                                                                              
	GO:0030029                                                                                                                                                                                                                                                                                                                                                                                                          actin filament-based process                                                                                                                                                                                                                                                                                                                                                                                        Any cellular process that depends upon or alters the actin cytoskeleton, that part of the cytoskeleton comprising actin filaments and their associated proteins.                                                                                                                                                                                                                                                    
	GO:0051726                                                                                                                                                                                                                                                                                                                                                                                                          regulation of cell cycle                                                                                                                                                                                                                                                                                                                                                                                            Any process that modulates the rate or extent of progression through the cell cycle.                                                                                                                                                                                                                                                                                                                                
	GO:0016070                                                                                                                                                                                                                                                                                                                                                                                                          RNA metabolic process                                                                                                                                                                                                                                                                                                                                                                                               The cellular chemical reactions and pathways involving RNA, ribonucleic acid, one of the two main type of nucleic acid, consisting of a long, unbranched macromolecule formed from ribonucleotides joined in 3',5'-phosphodiester linkage.                                                                                                                                                                          
	GO:0044085                                                                                                                                                                                                                                                                                                                                                                                                          cellular component biogenesis                                                                                                                                                                                                                                                                                                                                                                                       A process that results in the biosynthesis of constituent macromolecules, assembly, and arrangement of constituent parts of a cellular component. Includes biosynthesis of constituent macromolecules, and those macromolecular modifications that are involved in synthesis or assembly of the cellular component.                                                                                                 
	GO:0044763                                                                                                                                                                                                                                                                                                                                                                                                          single-organism cellular process                                                                                                                                                                                                                                                                                                                                                                                    Any process that is carried out at the cellular level, occurring within a single organism.                                                                                                                                                                                                                                                                                                                          
	GO:0048518                                                                                                                                                                                                                                                                                                                                                                                                          positive regulation of biological process                                                                                                                                                                                                                                                                                                                                                                           Any process that activates or increases the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.                                                                                                                                         
	GO:0016070                                                                                                                                                                                                                                                                                                                                                                                                          RNA metabolic process                                                                                                                                                                                                                                                                                                                                                                                               The cellular chemical reactions and pathways involving RNA, ribonucleic acid, one of the two main type of nucleic acid, consisting of a long, unbranched macromolecule formed from ribonucleotides joined in 3',5'-phosphodiester linkage.                                                                                                                                                                          
	GO:0051716                                                                                                                                                                                                                                                                                                                                                                                                          cellular response to stimulus                                                                                                                                                                                                                                                                                                                                                                                       Any process that results in a change in state or activity of a cell (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a stimulus. The process begins with detection of the stimulus by a cell and ends with a change in state or activity or the cell.                                                                                                                     
	GO:0009893                                                                                                                                                                                                                                                                                                                                                                                                          positive regulation of metabolic process                                                                                                                                                                                                                                                                                                                                                                            Any process that activates or increases the frequency, rate or extent of the chemical reactions and pathways within a cell or an organism.                                                                                                                                                                                                                                                                          
	GO:0009056                                                                                                                                                                                                                                                                                                                                                                                                          catabolic process                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways resulting in the breakdown of substances, including the breakdown of carbon compounds with the liberation of energy for use by the cell or organism.                                                                                                                                                                                                                            
	GO:0042221                                                                                                                                                                                                                                                                                                                                                                                                          response to chemical                                                                                                                                                                                                                                                                                                                                                                                                Any process that results in a change in state or activity of a cell or an organism (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a chemical stimulus.                                                                                                                                                                                                                  
	GO:0051179                                                                                                                                                                                                                                                                                                                                                                                                          localization                                                                                                                                                                                                                                                                                                                                                                                                        Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.                                                                                                                                         
	GO:0061024                                                                                                                                                                                                                                                                                                                                                                                                          membrane organization                                                                                                                                                                                                                                                                                                                                                                                               A process which results in the assembly, arrangement of constituent parts, or disassembly of a membrane. A membrane is a double layer of lipid molecules that encloses all cells, and, in eukaryotes, many organelles; may be a single or double lipid bilayer; also includes associated proteins.                                                                                                                  
	GO:0050789                                                                                                                                                                                                                                                                                                                                                                                                          regulation of biological process                                                                                                                                                                                                                                                                                                                                                                                    Any process that modulates the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.                                                                                                                                                      
	GO:0006629                                                                                                                                                                                                                                                                                                                                                                                                          lipid metabolic process                                                                                                                                                                                                                                                                                                                                                                                             The chemical reactions and pathways involving lipids, compounds soluble in an organic solvent but not, or sparingly, in an aqueous solvent. Includes fatty acids; neutral fats, other fatty-acid esters, and soaps; long-chain (fatty) alcohols and waxes; sphingoids and other long-chain bases; glycolipids, phospholipids and sphingolipids; and carotenes, polyprenols, sterols, terpenes and other isoprenoids.
	GO:0065007                                                                                                                                                                                                                                                                                                                                                                                                          biological regulation                                                                                                                                                                                                                                                                                                                                                                                               Any process that modulates a measurable attribute of any biological process, quality or function.                                                                                                                                                                                                                                                                                                                   
	GO:0018130                                                                                                                                                                                                                                                                                                                                                                                                          heterocycle biosynthetic process                                                                                                                                                                                                                                                                                                                                                                                    The chemical reactions and pathways resulting in the formation of heterocyclic compounds, those with a cyclic molecular structure and at least two different atoms in the ring (or rings).                                                                                                                                                                                                                          
	GO:0008152                                                                                                                                                                                                                                                                                                                                                                                                          metabolic process                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.                                                                                                     
	GO:0007031                                                                                                                                                                                                                                                                                                                                                                                                          peroxisome organization                                                                                                                                                                                                                                                                                                                                                                                             A process that is carried out at the cellular level which results in the assembly, arrangement of constituent parts, or disassembly of a peroxisome. A peroxisome is a small, membrane-bounded organelle that uses dioxygen (O2) to oxidize organic molecules.                                                                                                                                                      
	GO:0065007                                                                                                                                                                                                                                                                                                                                                                                                          biological regulation                                                                                                                                                                                                                                                                                                                                                                                               Any process that modulates a measurable attribute of any biological process, quality or function.                                                                                                                                                                                                                                                                                                                   
	GO:0051179                                                                                                                                                                                                                                                                                                                                                                                                          localization                                                                                                                                                                                                                                                                                                                                                                                                        Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.                                                                                                                                         
	GO:0071840                                                                                                                                                                                                                                                                                                                                                                                                          cellular component organization or biogenesis                                                                                                                                                                                                                                                                                                                                                                       A process that results in the biosynthesis of constituent macromolecules, assembly, arrangement of constituent parts, or disassembly of a cellular component.                                                                                                                                                                                                                                                       
	GO:0071840                                                                                                                                                                                                                                                                                                                                                                                                          cellular component organization or biogenesis                                                                                                                                                                                                                                                                                                                                                                       A process that results in the biosynthesis of constituent macromolecules, assembly, arrangement of constituent parts, or disassembly of a cellular component.                                                                                                                                                                                                                                                       
	GO:0008152                                                                                                                                                                                                                                                                                                                                                                                                          metabolic process                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.                                                                                                     
	GO:0008152                                                                                                                                                                                                                                                                                                                                                                                                          metabolic process                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.                                                                                                     
	GO:0050789                                                                                                                                                                                                                                                                                                                                                                                                          regulation of biological process                                                                                                                                                                                                                                                                                                                                                                                    Any process that modulates the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.                                                                                                                                                      
	GO:0043603                                                                                                                                                                                                                                                                                                                                                                                                          cellular amide metabolic process                                                                                                                                                                                                                                                                                                                                                                                    The chemical reactions and pathways involving an amide, any derivative of an oxoacid in which an acidic hydroxy group has been replaced by an amino or substituted amino group, as carried out by individual cells.                                                                                                                                                                                                 
	GO:0009132                                                                                                                                                                                                                                                                                                                                                                                                          nucleoside diphosphate metabolic process                                                                                                                                                                                                                                                                                                                                                                            The chemical reactions and pathways involving a nucleoside diphosphate, a compound consisting of a nucleobase linked to a deoxyribose or ribose sugar esterified with diphosphate on the sugar.                                                                                                                                                                                                                     
	GO:0009058                                                                                                                                                                                                                                                                                                                                                                                                          biosynthetic process                                                                                                                                                                                                                                                                                                                                                                                                The chemical reactions and pathways resulting in the formation of substances; typically the energy-requiring part of metabolism in which simpler substances are transformed into more complex ones.                                                                                                                                                                                                                 
	GO:0051179                                                                                                                                                                                                                                                                                                                                                                                                          localization                                                                                                                                                                                                                                                                                                                                                                                                        Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.                                                                                                                                         
	GO:0048519                                                                                                                                                                                                                                                                                                                                                                                                          negative regulation of biological process                                                                                                                                                                                                                                                                                                                                                                           Any process that stops, prevents, or reduces the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.                                                                                                                                    
	GO:1901360                                                                                                                                                                                                                                                                                                                                                                                                          organic cyclic compound metabolic process                                                                                                                                                                                                                                                                                                                                                                           The chemical reactions and pathways involving organic cyclic compound.                                                                                                                                                                                                                                                                                                                                              
	GO:0022402                                                                                                                                                                                                                                                                                                                                                                                                          cell cycle process                                                                                                                                                                                                                                                                                                                                                                                                  The cellular process that ensures successive accurate and complete genome replication and chromosome segregation.                                                                                                                                                                                                                                                                                                   
	GO:0007163                                                                                                                                                                                                                                                                                                                                                                                                          establishment or maintenance of cell polarity                                                                                                                                                                                                                                                                                                                                                                       Any cellular process that results in the specification, formation or maintenance of anisotropic intracellular organization or cell growth patterns.                                                                                                                                                                                                                                                                 
	GO:0043170                                                                                                                                                                                                                                                                                                                                                                                                          macromolecule metabolic process                                                                                                                                                                                                                                                                                                                                                                                     The chemical reactions and pathways involving macromolecules, any molecule of high relative molecular mass, the structure of which essentially comprises the multiple repetition of units derived, actually or conceptually, from molecules of low relative molecular mass.                                                                                                                                         
	GO:0009056                                                                                                                                                                                                                                                                                                                                                                                                          catabolic process                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways resulting in the breakdown of substances, including the breakdown of carbon compounds with the liberation of energy for use by the cell or organism.                                                                                                                                                                                                                            
	GO:0043170                                                                                                                                                                                                                                                                                                                                                                                                          macromolecule metabolic process                                                                                                                                                                                                                                                                                                                                                                                     The chemical reactions and pathways involving macromolecules, any molecule of high relative molecular mass, the structure of which essentially comprises the multiple repetition of units derived, actually or conceptually, from molecules of low relative molecular mass.                                                                                                                                         
	GO:0051716                                                                                                                                                                                                                                                                                                                                                                                                          cellular response to stimulus                                                                                                                                                                                                                                                                                                                                                                                       Any process that results in a change in state or activity of a cell (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a stimulus. The process begins with detection of the stimulus by a cell and ends with a change in state or activity or the cell.                                                                                                                     
	GO:0016043                                                                                                                                                                                                                                                                                                                                                                                                          cellular component organization                                                                                                                                                                                                                                                                                                                                                                                     A process that results in the assembly, arrangement of constituent parts, or disassembly of a cellular component.                                                                                                                                                                                                                                                                                                   
	GO:0009058                                                                                                                                                                                                                                                                                                                                                                                                          biosynthetic process                                                                                                                                                                                                                                                                                                                                                                                                The chemical reactions and pathways resulting in the formation of substances; typically the energy-requiring part of metabolism in which simpler substances are transformed into more complex ones.



In [36]:

    
simsGOAncestor <- mgoSim(representativeTermsAncestor, representativeTermsAncestor, semData=scGO, measure="Wang", combine=NULL)



In [37]:

    
head(simsGOAncestor)









    





GO:0006644 GO:0007049 GO:0044699 GO:0044710 GO:0051128 GO:0034641 GO:0006725 GO:0044238 GO:0005975 GO:0030029 ⋯ GO:0071840 GO:0043603 GO:0009132 GO:0009058 GO:0048519 GO:1901360 GO:0022402 GO:0007163 GO:0043170 GO:0016043

	GO:0006644 1.000 0.354 0.239 0.405 0.130 0.334 0.354 0.323 0.362 0.354 ⋯    0.106 0.294 0.618 0.197 0.087 0.281 0.339 0.354 0.281 0.184
	GO:0007049 0.354 1.000 0.547 0.379 0.245 0.289 0.321 0.191 0.139 0.722 ⋯    0.243 0.256 0.291 0.191 0.180 0.156 0.872 0.722 0.156 0.379
	GO:0044699 0.239 0.547 1.000 0.643 0.178 0.212 0.243 0.340 0.236 0.547 ⋯    0.444 0.192 0.198 0.340 0.304 0.276 0.493 0.547 0.276 0.286
	GO:0044710 0.405 0.379 0.643 1.000 0.129 0.340 0.379 0.507 0.371 0.379 ⋯    0.286 0.305 0.336 0.507 0.210 0.419 0.349 0.379 0.419 0.198
	GO:0051128 0.130 0.245 0.178 0.129 1.000 0.225 0.245 0.142 0.107 0.245 ⋯    0.400 0.198 0.107 0.142 0.447 0.117 0.229 0.245 0.117 0.651
	GO:0034641 0.334 0.289 0.212 0.340 0.225 1.000 0.649 0.379 0.283 0.289 ⋯    0.212 0.888 0.433 0.379 0.161 0.314 0.268 0.289 0.314 0.340



In [38]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 ⋯ V43 V44 V45 V46 V47 V48 V49 V50 V51 V52

	0 1 1 1 1 2 2 1 2 2 ⋯ 5 3 5 2 4 3 3 3 3 3
	1 0 1 1 1 1 2 2 1 2 ⋯ 5 3 5 2 4 3 3 3 2 2
	1 1 0 2 2 2 3 2 2 3 ⋯ 6 2 6 3 5 4 2 2 3 3
	1 1 2 0 2 2 3 2 2 3 ⋯ 6 4 6 3 5 4 4 4 3 3
	1 1 2 2 0 1 1 1 1 1 ⋯ 4 4 4 1 3 2 4 4 2 2
	2 1 2 2 1 0 1 2 2 2 ⋯ 5 4 5 2 4 3 4 4 1 1



In [15]:

    
information_content <- function(term){
    return (goSim(term, term, semData=scGO, measure="Resnik"))
}

most_representative_term_ic <- function(namedTerms){
    ics <- sapply(names(namedTerms), information_content)
    names(ics) <- names(namedTerms)
    return(names(sort(ics, decreasing=TRUE)[1]))
}



In [16]:

    
representativeTermsIC <- sapply(Filter(length, gos), most_representative_term_ic)



In [17]:

    
select(GO.db, keys=representativeTermsIC, columns=c("TERM", "DEFINITION"))









    



'select()' returned 1:1 mapping between keys and columns






    





GOID TERM DEFINITION

	GO:0090114                                                                                                                                                                                                                                                                                                       COPII-coated vesicle budding                                                                                                                                                                                                                                                                                     The evagination of an endoplasmic reticulum membrane, resulting in formation of a COPII-coated vesicle.                                                                                                                                                                                                          
	GO:0031146                                                                                                                                                                                                                                                                                                       SCF-dependent proteasomal ubiquitin-dependent protein catabolic process                                                                                                                                                                                                                                          The chemical reactions and pathways resulting in the breakdown of a protein or peptide by hydrolysis of its peptide bonds, initiated by the covalent attachment of ubiquitin, with ubiquitin-protein ligation catalyzed by an SCF (Skp1/Cul1/F-box protein) complex, and mediated by the proteasome.             
	GO:0009132                                                                                                                                                                                                                                                                                                       nucleoside diphosphate metabolic process                                                                                                                                                                                                                                                                         The chemical reactions and pathways involving a nucleoside diphosphate, a compound consisting of a nucleobase linked to a deoxyribose or ribose sugar esterified with diphosphate on the sugar.                                                                                                                  
	GO:0044038                                                                                                                                                                                                                                                                                                       cell wall macromolecule biosynthetic process                                                                                                                                                                                                                                                                     The chemical reactions and pathways resulting in the formation of a macromolecule destined to form part of a cell wall.                                                                                                                                                                                          
	GO:0043467                                                                                                                                                                                                                                                                                                       regulation of generation of precursor metabolites and energy                                                                                                                                                                                                                                                     Any process that modulates the frequency, rate or extent of the chemical reactions and pathways resulting in the formation of precursor metabolites, substances from which energy is derived, and the processes involved in the liberation of energy from these substances.                                      
	GO:0008033                                                                                                                                                                                                                                                                                                       tRNA processing                                                                                                                                                                                                                                                                                                  The process in which a pre-tRNA molecule is converted to a mature tRNA, ready for addition of an aminoacyl group.                                                                                                                                                                                                
	GO:0031113                                                                                                                                                                                                                                                                                                       regulation of microtubule polymerization                                                                                                                                                                                                                                                                         Any process that modulates the frequency, rate or extent of microtubule polymerization.                                                                                                                                                                                                                          
	GO:0051123                                                                                                                                                                                                                                                                                                       RNA polymerase II transcriptional preinitiation complex assembly                                                                                                                                                                                                                                                 The aggregation, arrangement and bonding together of proteins on an RNA polymerase II promoter DNA to form the transcriptional preinitiation complex (PIC), the formation of which is a prerequisite for transcription by RNA polymerase.                                                                        
	GO:0006896                                                                                                                                                                                                                                                                                                       Golgi to vacuole transport                                                                                                                                                                                                                                                                                       The directed movement of substances from the Golgi to the vacuole.                                                                                                                                                                                                                                               
	GO:0016973                                                                                                                                                                                                                                                                                                       poly(A)+ mRNA export from nucleus                                                                                                                                                                                                                                                                                The directed movement of poly(A)+ mRNA out of the nucleus into the cytoplasm.                                                                                                                                                                                                                                    
	GO:0006576                                                                                                                                                                                                                                                                                                       cellular biogenic amine metabolic process                                                                                                                                                                                                                                                                        The chemical reactions and pathways occurring at the level of individual cells involving any of a group of naturally occurring, biologically active amines, such as norepinephrine, histamine, and serotonin, many of which act as neurotransmitters.                                                            
	GO:0002181                                                                                                                                                                                                                                                                                                       cytoplasmic translation                                                                                                                                                                                                                                                                                          The chemical reactions and pathways resulting in the formation of a protein in the cytoplasm. This is a ribosome-mediated process in which the information in messenger RNA (mRNA) is used to specify the sequence of amino acids in the protein.                                                                
	GO:0040001                                                                                                                                                                                                                                                                                                       establishment of mitotic spindle localization                                                                                                                                                                                                                                                                    The cell cycle process in which the directed movement of the mitotic spindle to a specific location in the cell occurs.                                                                                                                                                                                          
	GO:0016558                                                                                                                                                                                                                                                                                                       protein import into peroxisome matrix                                                                                                                                                                                                                                                                            The import of proteins into the peroxisomal matrix. A peroxisome targeting signal (PTS) binds to a soluble receptor protein in the cytosol, and the resulting complex then binds to a receptor protein in the peroxisome membrane and is imported. The cargo protein is then released into the peroxisome matrix.
	GO:0006999                                                                                                                                                                                                                                                                                                       nuclear pore organization                                                                                                                                                                                                                                                                                        A process that is carried out at the cellular level which results in the assembly, arrangement of constituent parts, or disassembly of the nuclear pore.                                                                                                                                                         
	GO:0007096                                                                                                                                                                                                                                                                                                       regulation of exit from mitosis                                                                                                                                                                                                                                                                                  Any process involved in the progression from anaphase/telophase to G1 that is associated with a conversion from high to low mitotic CDK activity.                                                                                                                                                                
	GO:1904669                                                                                                                                                                                                                                                                                                       ATP export                                                                                                                                                                                                                                                                                                       The directed movement of ATP out of a cell or organelle.



In [18]:

    
simsGOIC <- mgoSim(representativeTermsIC, representativeTermsIC, semData=scGO, measure="Wang", combine=NULL)



In [19]:

    
head(simsGOIC)









    





GO:0090114 GO:0031146 GO:0009132 GO:0044038 GO:0043467 GO:0008033 GO:0031113 GO:0051123 GO:0006896 GO:0016973 GO:0006576 GO:0002181 GO:0040001 GO:0016558 GO:0006999 GO:0007096 GO:1904669

	GO:0090114 1.000 0.045 0.126 0.083 0.070 0.050 0.187 0.086 0.551 0.392 0.068 0.048 0.428 0.496 0.197 0.193 0.214
	GO:0031146 0.045 1.000 0.186 0.241 0.161 0.226 0.037 0.154 0.024 0.092 0.196 0.296 0.040 0.032 0.072 0.035 0.020
	GO:0009132 0.126 0.186 1.000 0.193 0.192 0.404 0.102 0.294 0.065 0.112 0.350 0.247 0.119 0.097 0.089 0.106 0.056
	GO:0044038 0.083 0.241 0.193 1.000 0.205 0.270 0.106 0.399 0.029 0.114 0.250 0.462 0.080 0.064 0.141 0.071 0.025
	GO:0043467 0.070 0.161 0.192 0.205 1.000 0.185 0.181 0.126 0.039 0.063 0.274 0.162 0.068 0.053 0.128 0.176 0.033
	GO:0008033 0.050 0.226 0.404 0.270 0.185 1.000 0.042 0.383 0.026 0.130 0.328 0.328 0.046 0.037 0.083 0.040 0.021



In [20]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20

	0 1 1 1 1 1 2 1 2 2 2 2 3 3 2 3 2 3 3 4
	1 0 1 1 2 2 1 1 1 1 1 1 2 2 1 2 1 2 2 3
	1 1 0 2 2 1 1 2 2 2 2 2 3 3 2 2 2 3 3 4
	1 1 2 0 1 2 2 2 2 2 2 2 3 3 2 3 2 3 3 4
	1 2 2 1 0 2 3 2 3 3 3 3 4 4 3 4 3 4 4 5
	1 2 1 2 2 0 2 2 3 3 3 3 4 4 3 3 3 4 4 5



In [148]:

    
wangClusterSim <- mclusterSim(g, semData=scGO, measure="Wang", combine="BMA")



In [149]:

    
head(wangClusterSim)









    






	1.000 0.527 0.544 0.491 0.534 0.491 0.513 0.495 0.448 0.505 0.375 0.632 0.531
	0.527 1.000 0.610 0.568 0.520 0.534 0.523 0.507 0.548 0.540 0.455 0.584 0.494
	0.544 0.610 1.000 0.625 0.523 0.608 0.651 0.606 0.634 0.602 0.358 0.604 0.391
	0.491 0.568 0.625 1.000 0.514 0.558 0.609 0.552 0.623 0.592 0.416 0.521 0.383
	0.534 0.520 0.523 0.514 1.000 0.496 0.514 0.522 0.464 0.585 0.409 0.554 0.453
	0.491 0.534 0.608 0.558 0.496 1.000 0.557 0.540 0.539 0.549 0.354 0.591 0.453



In [150]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13

	0 1 1 1 2 2 1 1 2 2 1 1 1
	1 0 1 2 1 1 2 2 3 1 2 2 2
	1 1 0 1 1 2 2 2 3 2 2 2 2
	1 2 1 0 2 3 1 2 2 3 2 2 2
	2 1 1 2 0 1 3 3 4 2 3 3 3
	2 1 2 3 1 0 3 3 4 1 3 3 3



In [156]:

    
goSims <- matrix(numeric(), nrow=numCom, ncol=numCom)

for (i in 1:numCom){
    for (j in 1:numCom){
        goSims[i, j] = mgoSim(names(gos[[i]]), names(gos[[j]]), measure="Wang", semData=scGO, combine="BMA")
    }
}



In [157]:

    
head(goSims)









    






	1.000 0.164 0.093 0.140 0.602 0.145 0.221 0.338 0.134 0.276 NA   0.525 0.626
	0.164 1.000 0.633 0.475 0.171 0.348 0.363 0.322 0.672 0.234 NA   0.362 0.082
	0.093 0.633 1.000 0.343 0.085 0.286 0.411 0.201 0.563 0.200 NA   0.246 0.073
	0.140 0.475 0.343 1.000 0.132 0.321 0.236 0.226 1.000 0.276 NA   0.331 0.094
	0.602 0.171 0.085 0.132 1.000 0.182 0.298 0.511 0.134 0.234 NA   0.447 0.537
	0.145 0.348 0.286 0.321 0.182 1.000 0.285 0.278 0.385 0.286 NA   0.368 0.137



In [18]:

    
wangGoSims <- sapply(names(enrichedGOs), 
                     function(i) sapply(names(enrichedGOs), 
                                        function(j) mgoSim(i, j, semData=scGO, measure="Wang", combine="BMA")))



In [19]:

    
wangGoSims



In [22]:

    
mgeneSim(allGeneNames[as.integer(g[[1]])], semData=scGO, measure="Wang", combine="BMA")









    



  |======================================================================| 100%






    





YML028W YGL122C YPL214C YHL006C YKL130C YCR011C YBL007C YJR091C YGL145W YBR133C YDR214W YGR268C YLR291C YOR138C YEL023C YFR002W

	YML028W 1.000 0.218 0.330 0.389 0.193 0.385 0.183 0.150 0.121 0.324 0.547 0.477 0.146 0.157 0.477 0.152
	YGL122C 0.218 1.000 0.270 0.318 0.119 0.340 0.085 0.332 0.451 0.351 0.098 0.185 0.295 0.223 0.185 0.326
	YPL214C 0.330 0.270 1.000 0.338 0.188 0.171 0.131 0.212 0.053 0.319 0.106 0.069 0.396 0.123 0.069 0.080
	YHL006C 0.389 0.318 0.338 1.000 0.208 0.234 0.264 0.263 0.078 0.310 0.235 0.105 0.230 0.249 0.105 0.196
	YKL130C 0.193 0.119 0.188 0.208 1.000 0.245 0.176 0.298 0.203 0.152 0.142 0.243 0.118 0.091 0.243 0.156
	YCR011C 0.385 0.340 0.171 0.234 0.245 1.000 0.391 0.242 0.512 0.373 0.284 0.196 0.161 0.112 0.196 0.350
	YBL007C 0.183 0.085 0.131 0.264 0.176 0.391 1.000 0.149 0.453 0.167 0.158 0.283 0.079 0.062 0.283 0.328
	YJR091C 0.150 0.332 0.212 0.263 0.298 0.242 0.149 1.000 0.344 0.210 0.118 0.200 0.115 0.140 0.200 0.221
	YGL145W 0.121 0.451 0.053 0.078 0.203 0.512 0.453 0.344 1.000 0.117 0.095 0.158 0.045 0.035 0.158 0.451
	YBR133C 0.324 0.351 0.319 0.310 0.152 0.373 0.167 0.210 0.117 1.000 0.192 0.217 0.294 0.330 0.217 0.132
	YDR214W 0.547 0.098 0.106 0.235 0.142 0.284 0.158 0.118 0.095 0.192 1.000 0.477 0.099 0.144 0.477 0.145
	YGR268C 0.477 0.185 0.069 0.105 0.243 0.196 0.283 0.200 0.158 0.217 0.477 1.000 0.077 0.144 1.000 0.203
	YLR291C 0.146 0.295 0.396 0.230 0.118 0.161 0.079 0.115 0.045 0.294 0.099 0.077 1.000 0.268 0.077 0.064
	YOR138C 0.157 0.223 0.123 0.249 0.091 0.112 0.062 0.140 0.035 0.330 0.144 0.144 0.268 1.000 0.144 0.070
	YEL023C 0.477 0.185 0.069 0.105 0.243 0.196 0.283 0.200 0.158 0.217 0.477 1.000 0.077 0.144 1.000 0.203
	YFR002W 0.152 0.326 0.080 0.196 0.156 0.350 0.328 0.221 0.451 0.132 0.145 0.203 0.064 0.070 0.203 1.000



In [21]:

    
mgoSim(names(enrichedGOs[[1]]), names(enrichedGOs[[2]]), semData=scGO, measure="Wang", combine="BMA")



In [15]:

    
head(shortest.path)









    





V1 V2 V3 V4 V5 V6

	0 1 1 1 1 1
	1 0 1 2 2 2
	1 1 0 1 1 1
	1 2 1 0 2 2
	1 2 1 2 0 2
	1 2 1 2 2 0



In [116]:

    
distances <- numeric(length = (numCom * (numCom - 1)) / 2)
semSims <- numeric(length = (numCom * (numCom - 1)) / 2)

completed <- 0

for (c1 in 1:length(enrichedGOsPathway)) {
    
    for (c2 in c1:length(enrichedGOsPathway)) {
        
        if (c1 == c2) next   
        
        completed <- completed + 1  
        semSims[completed] <- simsPathway[c1, c2]
            
        distances[completed] <- shortest.path[c1, c2]
        
        print(sprintf("Completed: %s", completed))
    }
}









    



[1] "Completed: 1"
[1] "Completed: 2"
[1] "Completed: 3"
[1] "Completed: 4"
[1] "Completed: 5"
[1] "Completed: 6"
[1] "Completed: 7"
[1] "Completed: 8"
[1] "Completed: 9"
[1] "Completed: 10"
[1] "Completed: 11"
[1] "Completed: 12"
[1] "Completed: 13"
[1] "Completed: 14"
[1] "Completed: 15"
[1] "Completed: 16"
[1] "Completed: 17"
[1] "Completed: 18"
[1] "Completed: 19"
[1] "Completed: 20"
[1] "Completed: 21"
[1] "Completed: 22"
[1] "Completed: 23"
[1] "Completed: 24"
[1] "Completed: 25"
[1] "Completed: 26"
[1] "Completed: 27"
[1] "Completed: 28"
[1] "Completed: 29"
[1] "Completed: 30"
[1] "Completed: 31"
[1] "Completed: 32"
[1] "Completed: 33"
[1] "Completed: 34"
[1] "Completed: 35"
[1] "Completed: 36"
[1] "Completed: 37"
[1] "Completed: 38"
[1] "Completed: 39"
[1] "Completed: 40"
[1] "Completed: 41"
[1] "Completed: 42"
[1] "Completed: 43"
[1] "Completed: 44"
[1] "Completed: 45"
[1] "Completed: 46"
[1] "Completed: 47"
[1] "Completed: 48"
[1] "Completed: 49"
[1] "Completed: 50"
[1] "Completed: 51"
[1] "Completed: 52"
[1] "Completed: 53"
[1] "Completed: 54"
[1] "Completed: 55"
[1] "Completed: 56"
[1] "Completed: 57"
[1] "Completed: 58"
[1] "Completed: 59"
[1] "Completed: 60"
[1] "Completed: 61"
[1] "Completed: 62"
[1] "Completed: 63"
[1] "Completed: 64"
[1] "Completed: 65"
[1] "Completed: 66"
[1] "Completed: 67"
[1] "Completed: 68"
[1] "Completed: 69"
[1] "Completed: 70"
[1] "Completed: 71"
[1] "Completed: 72"
[1] "Completed: 73"
[1] "Completed: 74"
[1] "Completed: 75"
[1] "Completed: 76"
[1] "Completed: 77"
[1] "Completed: 78"
[1] "Completed: 79"
[1] "Completed: 80"
[1] "Completed: 81"
[1] "Completed: 82"
[1] "Completed: 83"
[1] "Completed: 84"
[1] "Completed: 85"
[1] "Completed: 86"
[1] "Completed: 87"
[1] "Completed: 88"
[1] "Completed: 89"
[1] "Completed: 90"
[1] "Completed: 91"
[1] "Completed: 92"
[1] "Completed: 93"
[1] "Completed: 94"
[1] "Completed: 95"
[1] "Completed: 96"
[1] "Completed: 97"
[1] "Completed: 98"
[1] "Completed: 99"
[1] "Completed: 100"
[1] "Completed: 101"
[1] "Completed: 102"
[1] "Completed: 103"
[1] "Completed: 104"
[1] "Completed: 105"
[1] "Completed: 106"
[1] "Completed: 107"
[1] "Completed: 108"
[1] "Completed: 109"
[1] "Completed: 110"
[1] "Completed: 111"
[1] "Completed: 112"
[1] "Completed: 113"
[1] "Completed: 114"
[1] "Completed: 115"
[1] "Completed: 116"
[1] "Completed: 117"
[1] "Completed: 118"
[1] "Completed: 119"
[1] "Completed: 120"
[1] "Completed: 121"
[1] "Completed: 122"
[1] "Completed: 123"
[1] "Completed: 124"
[1] "Completed: 125"
[1] "Completed: 126"
[1] "Completed: 127"
[1] "Completed: 128"
[1] "Completed: 129"
[1] "Completed: 130"
[1] "Completed: 131"
[1] "Completed: 132"
[1] "Completed: 133"
[1] "Completed: 134"
[1] "Completed: 135"
[1] "Completed: 136"
[1] "Completed: 137"
[1] "Completed: 138"
[1] "Completed: 139"
[1] "Completed: 140"
[1] "Completed: 141"
[1] "Completed: 142"
[1] "Completed: 143"
[1] "Completed: 144"
[1] "Completed: 145"
[1] "Completed: 146"
[1] "Completed: 147"
[1] "Completed: 148"
[1] "Completed: 149"
[1] "Completed: 150"
[1] "Completed: 151"
[1] "Completed: 152"
[1] "Completed: 153"
[1] "Completed: 154"
[1] "Completed: 155"
[1] "Completed: 156"
[1] "Completed: 157"
[1] "Completed: 158"
[1] "Completed: 159"
[1] "Completed: 160"
[1] "Completed: 161"
[1] "Completed: 162"
[1] "Completed: 163"
[1] "Completed: 164"
[1] "Completed: 165"
[1] "Completed: 166"
[1] "Completed: 167"
[1] "Completed: 168"
[1] "Completed: 169"
[1] "Completed: 170"
[1] "Completed: 171"
[1] "Completed: 172"
[1] "Completed: 173"
[1] "Completed: 174"
[1] "Completed: 175"
[1] "Completed: 176"
[1] "Completed: 177"
[1] "Completed: 178"
[1] "Completed: 179"
[1] "Completed: 180"
[1] "Completed: 181"
[1] "Completed: 182"
[1] "Completed: 183"
[1] "Completed: 184"
[1] "Completed: 185"
[1] "Completed: 186"
[1] "Completed: 187"
[1] "Completed: 188"
[1] "Completed: 189"
[1] "Completed: 190"
[1] "Completed: 191"
[1] "Completed: 192"
[1] "Completed: 193"
[1] "Completed: 194"
[1] "Completed: 195"
[1] "Completed: 196"
[1] "Completed: 197"
[1] "Completed: 198"
[1] "Completed: 199"
[1] "Completed: 200"
[1] "Completed: 201"
[1] "Completed: 202"
[1] "Completed: 203"
[1] "Completed: 204"
[1] "Completed: 205"
[1] "Completed: 206"
[1] "Completed: 207"
[1] "Completed: 208"
[1] "Completed: 209"
[1] "Completed: 210"
[1] "Completed: 211"
[1] "Completed: 212"
[1] "Completed: 213"
[1] "Completed: 214"
[1] "Completed: 215"
[1] "Completed: 216"
[1] "Completed: 217"
[1] "Completed: 218"
[1] "Completed: 219"
[1] "Completed: 220"
[1] "Completed: 221"
[1] "Completed: 222"
[1] "Completed: 223"
[1] "Completed: 224"
[1] "Completed: 225"
[1] "Completed: 226"
[1] "Completed: 227"
[1] "Completed: 228"
[1] "Completed: 229"
[1] "Completed: 230"
[1] "Completed: 231"
[1] "Completed: 232"
[1] "Completed: 233"
[1] "Completed: 234"
[1] "Completed: 235"
[1] "Completed: 236"
[1] "Completed: 237"
[1] "Completed: 238"
[1] "Completed: 239"
[1] "Completed: 240"
[1] "Completed: 241"
[1] "Completed: 242"
[1] "Completed: 243"
[1] "Completed: 244"
[1] "Completed: 245"
[1] "Completed: 246"
[1] "Completed: 247"
[1] "Completed: 248"
[1] "Completed: 249"
[1] "Completed: 250"
[1] "Completed: 251"
[1] "Completed: 252"
[1] "Completed: 253"



In [117]:

    
plot(distances, semSims, xlab="Distance on Map", ylab="Shared Paths")



In [102]:

    
cor(distances, semSims, method="spearman")









    




-0.05678168738453



In [42]:

    
library(GOSim)
setOntology(ont, loadIC=FALSE)
setEvidenceLevel(evidences="all",organism=org.Sc.sgdORGANISM, gomap=org.Sc.sgdGO)
e <- GOenrichment(g[[46]], allGenes)









    



-> retrieving GO information for all available genes for organism 'Saccharomyces cerevisiae' in GO database
-> filtering GO terms according to evidence levels 'all'

Building most specific GOs .....
	( 1690 GO terms found. )

Build GO DAG topology ..........
	( 3645 GO terms and 8243 relations. )

Annotating nodes ...............
	( 1567 genes annotated to the GO terms. )

			 -- Elim Algorithm -- 

		 the algorithm is scoring 172 nontrivial nodes
		 parameters: 
			 test statistic: fisher
			 cutOff: 0.01

	 Level 14:	1 nodes to be scored	(0 eliminated genes)

	 Level 13:	3 nodes to be scored	(0 eliminated genes)

	 Level 12:	7 nodes to be scored	(0 eliminated genes)

	 Level 11:	9 nodes to be scored	(0 eliminated genes)

	 Level 10:	8 nodes to be scored	(1 eliminated genes)

	 Level 9:	17 nodes to be scored	(3 eliminated genes)

	 Level 8:	14 nodes to be scored	(8 eliminated genes)

	 Level 7:	20 nodes to be scored	(8 eliminated genes)

	 Level 6:	25 nodes to be scored	(8 eliminated genes)

	 Level 5:	28 nodes to be scored	(8 eliminated genes)

	 Level 4:	20 nodes to be scored	(28 eliminated genes)

	 Level 3:	12 nodes to be scored	(28 eliminated genes)

	 Level 2:	7 nodes to be scored	(28 eliminated genes)

	 Level 1:	1 nodes to be scored	(28 eliminated genes)



In [49]:

    
e









    





	$GOTerms
		
go_id Term Definition

	15591 GO:0018343                                                                                                 protein farnesylation                                                                                      The covalent attachment of a farnesyl group to a protein.                                                  
	15594 GO:0018344                                                                                                 protein geranylgeranylation                                                                                The covalent attachment of a geranylgeranyl group to a protein.                                            
	16626 GO:0006874                                                                                                 cellular calcium ion homeostasis                                                                           Any process involved in the maintenance of an internal steady state of calcium ions at the level of a cell.
	17047 GO:0030010                                                                                                 establishment of cell polarity                                                                             The specification and formation of anisotropic intracellular organization or cell growth patterns.         
	48636 GO:0042127                                                                                                 regulation of cell proliferation                                                                           Any process that modulates the frequency, rate or extent of cell proliferation.                            
	79331 GO:0070884                                                                                                 regulation of calcineurin-NFAT signaling cascade                                                           Any process that modulates the frequency, rate or extent of the calcineurin-NFAT signaling cascade.        



	$p.values
		
	GO:0006874
		0.00891715384596614
	GO:0042127
		0.00446713465220172
	GO:0070884
		0.00446713465220172
	GO:0018343
		1.71154584375543e-05
	GO:0018344
		0.000170063041559058
	GO:0030010
		0.00345128149568176


	$genes
		
	$`GO:0006874`
		
	'YBR187W'
	'YGL155W'


	$`GO:0042127`
		'YDL090C'
	$`GO:0070884`
		'YKL159C'
	$`GO:0018343`
		
	'YDL090C'
	'YKL019W'


	$`GO:0018344`
		
	'YGL155W'
	'YJL031C'
	'YKL019W'
	'YOR370C'
	'YPR176C'


	$`GO:0030010`
		
	'YCR063W'
	'YER093C'
	'YER118C'
	'YER149C'
	'YFL039C'
	'YGL054C'
	'YGL155W'
	'YGR014W'
	'YGR058W'
	'YGR262C'
	'YHR115C'
	'YHR129C'
	'YIL144W'
	'YLL049W'
	'YLR319C'
	'YMR294W'
	'YNL116W'
	'YOR127W'
	'YOR301W'
	'YPL161C'
	'YPL174C'



In [88]:

    
goTerms <- e$GOTerms
p.values <- e$p.values



In [89]:

    
p.values.df <- data.frame(p.values)
p.values.df["go_id"] <- names(p.values)
p.values.df









    





p.values go_id

	GO:0006874 8.917154e-03 GO:0006874  
	GO:0042127 4.467135e-03 GO:0042127  
	GO:0070884 4.467135e-03 GO:0070884  
	GO:0018343 1.711546e-05 GO:0018343  
	GO:0018344 1.700630e-04 GO:0018344  
	GO:0030010 3.451281e-03 GO:0030010



In [90]:

    
goTerms <- merge(goTerms, p.values.df, by="go_id")



In [93]:

    
colnames(goTerms) <- c("GO_ID", "TERM", "DEFINITION", "P_VALUE")
head(goTerms)









    





GO_ID TERM DEFINITION P_VALUE

	GO:0006874                                                                                                 cellular calcium ion homeostasis                                                                           Any process involved in the maintenance of an internal steady state of calcium ions at the level of a cell. 8.917154e-03                                                                                               
	GO:0018343                                                                                                 protein farnesylation                                                                                      The covalent attachment of a farnesyl group to a protein.                                                  1.711546e-05                                                                                               
	GO:0018344                                                                                                 protein geranylgeranylation                                                                                The covalent attachment of a geranylgeranyl group to a protein.                                            1.700630e-04                                                                                               
	GO:0030010                                                                                                 establishment of cell polarity                                                                             The specification and formation of anisotropic intracellular organization or cell growth patterns.         3.451281e-03                                                                                               
	GO:0042127                                                                                                 regulation of cell proliferation                                                                           Any process that modulates the frequency, rate or extent of cell proliferation.                            4.467135e-03                                                                                               
	GO:0070884                                                                                                 regulation of calcineurin-NFAT signaling cascade                                                           Any process that modulates the frequency, rate or extent of the calcineurin-NFAT signaling cascade.        4.467135e-03



In [94]:

    
library(gridExtra)
grid.table(goTerms[,c("GO_ID", "TERM", "P_VALUE")])



In [46]:

    
g[[46]]









    





	'YKL019W'
	'YGL155W'
	'YKL159C'
	'YCR063W'
	'YBR247C'
	'YDL090C'
	'YOL135C'



In [103]:

    
l <- as.list(org.Sc.sgdGO[["YKL019W"]])



In [116]:

    
gos <- sapply(l, function(i) i[["GOID"]])



In [122]:

    
t <- select(GO.db, keys=gos, columns=c("GOID","TERM","ONTOLOGY"))









    



'select()' returned many:1 mapping between keys and columns



In [123]:

    
grid.table(t)



In [ ]:

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V25	V26	V27	V28	V29	V30	V31	V32	V33	V34
0	1	2	2	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
1	0	1	1	1	1	1	1	1	2	⋯	2	2	2	3	4	4	2	3	1	2
2	1	0	1	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	1	0	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	2	2	0	1	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
2	1	2	2	1	0	1	2	2	3	⋯	2	3	3	3	4	4	3	4	2	3

	GO:0032049	GO:0006809	GO:0016226	GO:0045429	GO:0055114	GO:1901300	GO:0001402	GO:0006970	GO:0006972	GO:0007232	⋯	GO:0015918	GO:0035376	GO:0035690	GO:0033617	GO:0016050	GO:0016485	GO:0030433	GO:0031503	GO:0007097	GO:0031578
GO:0000077	0.121	0.094	0.121	0.050	0.161	0.191	0.311	0.282	0.242	0.371	⋯	0.094	0.087	0.289	0.072	0.144	0.049	0.280	0.067	0.049	0.112
GO:0001302	0.170	0.121	0.160	0.062	0.230	0.114	0.274	0.100	0.087	0.220	⋯	0.131	0.124	0.171	0.094	0.198	0.066	0.175	0.093	0.065	0.069
GO:0006457	0.129	0.198	0.276	0.093	0.191	0.079	0.211	0.186	0.163	0.172	⋯	0.100	0.098	0.305	0.156	0.374	0.119	0.120	0.181	0.117	0.045
GO:0033194	0.036	0.060	0.084	0.028	0.110	0.211	0.129	0.411	0.355	0.177	⋯	0.061	0.058	0.309	0.048	0.094	0.070	0.174	0.100	0.069	0.014
GO:0034599	0.073	0.111	0.143	0.059	0.089	0.326	0.250	0.334	0.290	0.351	⋯	0.053	0.050	0.582	0.087	0.173	0.059	0.296	0.081	0.058	0.030
GO:0042262	0.249	0.229	0.141	0.143	0.254	0.178	0.278	0.176	0.155	0.326	⋯	0.079	0.074	0.192	0.058	0.094	0.197	0.422	0.046	0.037	0.126
GO:0045454	0.160	0.114	0.147	0.176	0.198	0.206	0.478	0.089	0.078	0.395	⋯	0.119	0.112	0.156	0.090	0.176	0.061	0.166	0.082	0.060	0.125
GO:0061077	0.106	0.166	0.229	0.079	0.156	0.067	0.176	0.153	0.133	0.142	⋯	0.083	0.080	0.251	0.128	0.304	0.097	0.101	0.146	0.095	0.038
GO:0006378	0.151	0.243	0.143	0.137	0.117	0.035	0.073	0.052	0.044	0.059	⋯	0.032	0.029	0.091	0.054	0.100	0.259	0.197	0.046	0.034	0.082
GO:0016973	0.081	0.062	0.081	0.035	0.147	0.033	0.068	0.044	0.038	0.053	⋯	0.348	0.317	0.036	0.025	0.039	0.175	0.105	0.134	0.201	0.046
GO:0043488	0.085	0.100	0.141	0.137	0.175	0.067	0.166	0.089	0.077	0.135	⋯	0.053	0.050	0.069	0.043	0.078	0.279	0.106	0.082	0.059	0.085
GO:0045945	0.138	0.223	0.113	0.432	0.060	0.091	0.111	0.027	0.023	0.092	⋯	0.018	0.016	0.047	0.030	0.050	0.133	0.112	0.023	0.019	0.102
GO:1900152	0.085	0.138	0.076	0.195	0.059	0.061	0.116	0.026	0.022	0.096	⋯	0.017	0.015	0.048	0.030	0.050	0.109	0.196	0.023	0.018	0.226
GO:1900364	0.095	0.155	0.088	0.198	0.069	0.060	0.122	0.030	0.026	0.099	⋯	0.019	0.017	0.055	0.033	0.058	0.159	0.127	0.026	0.020	0.145
GO:0009228	0.265	0.322	0.158	0.198	0.180	0.070	0.127	0.038	0.033	0.105	⋯	0.058	0.053	0.071	0.046	0.075	0.085	0.170	0.034	0.027	0.071
GO:0000725	0.258	0.245	0.155	0.144	0.277	0.167	0.249	0.182	0.157	0.288	⋯	0.081	0.075	0.201	0.063	0.108	0.208	0.412	0.049	0.039	0.121
GO:0000730	0.171	0.196	0.309	0.109	0.220	0.115	0.188	0.156	0.129	0.206	⋯	0.061	0.055	0.167	0.309	0.210	0.157	0.281	0.043	0.033	0.096
GO:0043007	0.257	0.249	0.245	0.144	0.291	0.086	0.170	0.057	0.050	0.140	⋯	0.084	0.079	0.098	0.163	0.303	0.217	0.308	0.052	0.040	0.190
GO:0007533	0.201	0.137	0.172	0.079	0.342	0.089	0.180	0.067	0.059	0.148	⋯	0.100	0.095	0.104	0.064	0.114	0.110	0.207	0.062	0.048	0.072
GO:0008298	0.036	0.060	0.084	0.028	0.110	0.024	0.060	0.108	0.093	0.048	⋯	0.215	0.205	0.081	0.048	0.094	0.070	0.036	0.383	0.252	0.014
GO:0055085	0.186	0.126	0.162	0.067	0.224	0.127	0.285	0.098	0.086	0.235	⋯	0.530	0.501	0.173	0.100	0.196	0.068	0.188	0.193	0.222	0.078
GO:0000147	0.133	0.090	0.378	0.052	0.149	0.098	0.202	0.065	0.057	0.166	⋯	0.095	0.088	0.118	0.302	0.377	0.046	0.142	0.060	0.046	0.124
GO:0006897	0.034	0.058	0.086	0.026	0.115	0.022	0.059	0.113	0.096	0.046	⋯	0.316	0.299	0.082	0.046	0.097	0.070	0.033	0.235	0.262	0.012
GO:0030041	0.083	0.068	0.277	0.037	0.113	0.065	0.141	0.052	0.044	0.109	⋯	0.066	0.059	0.090	0.540	0.298	0.034	0.097	0.046	0.034	0.082
GO:0000288	0.125	0.216	0.126	0.119	0.105	0.029	0.062	0.046	0.038	0.049	⋯	0.027	0.024	0.080	0.044	0.087	0.183	0.285	0.040	0.029	0.109
GO:0034629	0.030	0.051	0.071	0.024	0.091	0.021	0.051	0.089	0.076	0.040	⋯	0.176	0.166	0.067	0.041	0.077	0.058	0.030	0.672	0.206	0.012
GO:0051654	0.034	0.054	0.076	0.027	0.095	0.023	0.055	0.094	0.081	0.044	⋯	0.196	0.186	0.072	0.045	0.082	0.063	0.034	0.195	0.645	0.013
GO:0006890	0.052	0.042	0.059	0.020	0.164	0.038	0.095	0.072	0.061	0.071	⋯	0.413	0.381	0.055	0.034	0.062	0.047	0.057	0.146	0.305	0.022
GO:0000086	0.115	0.088	0.115	0.045	0.165	0.083	0.195	0.072	0.061	0.152	⋯	0.093	0.085	0.123	0.065	0.139	0.046	0.127	0.065	0.046	0.186
GO:0006355	0.170	0.267	0.136	0.354	0.074	0.075	0.142	0.033	0.028	0.119	⋯	0.022	0.020	0.058	0.037	0.061	0.164	0.136	0.029	0.023	0.135
GO:0019918	0.132	0.156	0.153	0.080	0.136	0.031	0.073	0.060	0.050	0.056	⋯	0.033	0.030	0.097	0.050	0.109	0.273	0.212	0.052	0.037	0.074
GO:0034969	0.198	0.131	0.189	0.080	0.220	0.070	0.132	0.043	0.037	0.107	⋯	0.064	0.059	0.075	0.149	0.231	0.210	0.288	0.039	0.030	0.167
GO:0044257	0.186	0.184	0.167	0.104	0.138	0.041	0.085	0.061	0.052	0.070	⋯	0.038	0.035	0.108	0.064	0.119	0.343	0.610	0.055	0.041	0.259
GO:0051726	0.164	0.125	0.164	0.186	0.221	0.211	0.516	0.102	0.089	0.421	⋯	0.127	0.119	0.176	0.098	0.202	0.068	0.170	0.095	0.067	0.179
GO:0034605	0.074	0.113	0.147	0.059	0.092	0.176	0.255	0.472	0.408	0.414	⋯	0.054	0.051	0.352	0.088	0.178	0.061	0.253	0.083	0.060	0.030
GO:0008150	0.091	0.142	0.213	0.061	0.329	0.052	0.148	0.318	0.282	0.120	⋯	0.161	0.162	0.223	0.116	0.289	0.199	0.080	0.323	0.193	0.030
GO:0006446	0.184	0.302	0.155	0.375	0.086	0.080	0.153	0.038	0.033	0.129	⋯	0.026	0.024	0.071	0.047	0.075	0.201	0.181	0.034	0.028	0.175
GO:0019509	0.266	0.242	0.165	0.144	0.187	0.055	0.111	0.039	0.033	0.087	⋯	0.051	0.045	0.070	0.042	0.075	0.129	0.174	0.034	0.026	0.066
GO:0016579	0.168	0.174	0.170	0.091	0.153	0.035	0.081	0.067	0.057	0.064	⋯	0.039	0.036	0.106	0.057	0.120	0.466	0.323	0.060	0.043	0.124
GO:1902499	0.099	0.111	0.101	0.247	0.082	0.101	0.142	0.036	0.030	0.113	⋯	0.022	0.019	0.064	0.036	0.069	0.182	0.167	0.031	0.023	0.131
GO:0000055	0.046	0.033	0.114	0.019	0.113	0.035	0.075	0.050	0.043	0.058	⋯	0.285	0.260	0.040	0.090	0.088	0.035	0.050	0.101	0.344	0.031
GO:0006606	0.042	0.029	0.036	0.018	0.091	0.035	0.065	0.040	0.035	0.052	⋯	0.332	0.306	0.033	0.025	0.035	0.030	0.047	0.185	0.193	0.023
GO:0006999	0.068	0.107	0.301	0.054	0.092	0.047	0.113	0.090	0.077	0.091	⋯	0.051	0.048	0.153	0.384	0.550	0.059	0.069	0.083	0.057	0.077
GO:0051292	0.048	0.075	0.306	0.041	0.057	0.036	0.078	0.057	0.048	0.063	⋯	0.034	0.031	0.099	0.620	0.342	0.038	0.051	0.051	0.037	0.061

1.000	0.603	0.638	0.574	0.654	0.627
0.603	1.000	0.715	0.641	0.701	0.748
0.638	0.715	1.000	0.660	0.726	0.718
0.574	0.641	0.660	1.000	0.642	0.626
0.654	0.701	0.726	0.642	1.000	0.676
0.627	0.748	0.718	0.626	0.676	1.000

1.000	0.763	0.843	0.824	0.859	0.763	0.836	0.772	0.843	0.524	0.579	0.743	0.801	0.727	0.790	0.791	0.788	0.628	0.871	0.665
0.763	1.000	0.769	0.707	0.719	0.742	0.718	0.745	0.822	0.584	0.727	0.804	0.856	0.649	0.631	0.653	0.707	0.466	0.723	0.565
0.843	0.769	1.000	0.961	0.765	0.801	0.930	0.788	0.844	0.534	0.570	0.713	0.862	0.669	0.827	0.829	0.898	0.450	0.886	0.678
0.824	0.707	0.961	1.000	0.741	0.763	0.926	0.760	0.823	0.509	0.533	0.664	0.828	0.659	0.857	0.828	0.902	0.448	0.872	0.675
0.859	0.719	0.765	0.741	1.000	0.595	0.740	0.618	0.772	0.453	0.481	0.657	0.775	0.700	0.710	0.755	0.686	0.590	0.724	0.558
0.763	0.742	0.801	0.763	0.595	1.000	0.783	0.746	0.773	0.550	0.662	0.667	0.722	0.558	0.671	0.619	0.776	0.406	0.785	0.697

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V22	V23	V24	V25	V26	V27	V28	V29	V30	V31
0	1	1	1	1	1	2	2	2	3	⋯	4	4	5	4	5	5	6	6	7	7
1	0	1	2	2	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	1	0	1	1	2	2	3	1	3	⋯	5	5	5	4	5	5	6	6	7	7
1	2	1	0	2	1	1	2	1	2	⋯	4	4	4	3	4	4	5	5	6	6
1	2	1	2	0	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	2	2	1	2	0	1	1	2	2	⋯	3	3	4	3	4	4	5	5	6	6

GOID	TERM	DEFINITION
GO:0006644	phospholipid metabolic process	The chemical reactions and pathways involving phospholipids, any lipid containing phosphoric acid as a mono- or diester.
GO:0007049	cell cycle	The progression of biochemical and morphological phases and events that occur in a cell during successive cell replication or nuclear replication events. Canonically, the cell cycle comprises the replication and segregation of genetic material followed by the division of the cell, but in endocycles or syncytial cells nuclear replication or nuclear division may not be followed by cell division.
GO:0044699	single-organism process	A biological process that involves only one organism.
GO:0044710	single-organism metabolic process	A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.
GO:0051128	regulation of cellular component organization	Any process that modulates the frequency, rate or extent of a process involved in the formation, arrangement of constituent parts, or disassembly of cell structures, including the plasma membrane and any external encapsulating structures such as the cell wall and cell envelope.
GO:0034641	cellular nitrogen compound metabolic process	The chemical reactions and pathways involving various organic and inorganic nitrogenous compounds, as carried out by individual cells.
GO:0006725	cellular aromatic compound metabolic process	The chemical reactions and pathways involving aromatic compounds, any organic compound characterized by one or more planar rings, each of which contains conjugated double bonds and delocalized pi electrons, as carried out by individual cells.
GO:0044710	single-organism metabolic process	A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.
GO:0044238	primary metabolic process	The chemical reactions and pathways involving those compounds which are formed as a part of the normal anabolic and catabolic processes. These processes take place in most, if not all, cells of the organism.
GO:0044699	single-organism process	A biological process that involves only one organism.
GO:0005975	carbohydrate metabolic process	The chemical reactions and pathways involving carbohydrates, any of a group of organic compounds based of the general formula Cx(H2O)y. Includes the formation of carbohydrate derivatives by the addition of a carbohydrate residue to another molecule.
GO:0044710	single-organism metabolic process	A metabolic process - chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances - which involves a single organism.
GO:0030029	actin filament-based process	Any cellular process that depends upon or alters the actin cytoskeleton, that part of the cytoskeleton comprising actin filaments and their associated proteins.
GO:0051726	regulation of cell cycle	Any process that modulates the rate or extent of progression through the cell cycle.
GO:0016070	RNA metabolic process	The cellular chemical reactions and pathways involving RNA, ribonucleic acid, one of the two main type of nucleic acid, consisting of a long, unbranched macromolecule formed from ribonucleotides joined in 3',5'-phosphodiester linkage.
GO:0044085	cellular component biogenesis	A process that results in the biosynthesis of constituent macromolecules, assembly, and arrangement of constituent parts of a cellular component. Includes biosynthesis of constituent macromolecules, and those macromolecular modifications that are involved in synthesis or assembly of the cellular component.
GO:0044763	single-organism cellular process	Any process that is carried out at the cellular level, occurring within a single organism.
GO:0048518	positive regulation of biological process	Any process that activates or increases the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.
GO:0016070	RNA metabolic process	The cellular chemical reactions and pathways involving RNA, ribonucleic acid, one of the two main type of nucleic acid, consisting of a long, unbranched macromolecule formed from ribonucleotides joined in 3',5'-phosphodiester linkage.
GO:0051716	cellular response to stimulus	Any process that results in a change in state or activity of a cell (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a stimulus. The process begins with detection of the stimulus by a cell and ends with a change in state or activity or the cell.
GO:0009893	positive regulation of metabolic process	Any process that activates or increases the frequency, rate or extent of the chemical reactions and pathways within a cell or an organism.
GO:0009056	catabolic process	The chemical reactions and pathways resulting in the breakdown of substances, including the breakdown of carbon compounds with the liberation of energy for use by the cell or organism.
GO:0042221	response to chemical	Any process that results in a change in state or activity of a cell or an organism (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a chemical stimulus.
GO:0051179	localization	Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.
GO:0061024	membrane organization	A process which results in the assembly, arrangement of constituent parts, or disassembly of a membrane. A membrane is a double layer of lipid molecules that encloses all cells, and, in eukaryotes, many organelles; may be a single or double lipid bilayer; also includes associated proteins.
GO:0050789	regulation of biological process	Any process that modulates the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.
GO:0006629	lipid metabolic process	The chemical reactions and pathways involving lipids, compounds soluble in an organic solvent but not, or sparingly, in an aqueous solvent. Includes fatty acids; neutral fats, other fatty-acid esters, and soaps; long-chain (fatty) alcohols and waxes; sphingoids and other long-chain bases; glycolipids, phospholipids and sphingolipids; and carotenes, polyprenols, sterols, terpenes and other isoprenoids.
GO:0065007	biological regulation	Any process that modulates a measurable attribute of any biological process, quality or function.
GO:0018130	heterocycle biosynthetic process	The chemical reactions and pathways resulting in the formation of heterocyclic compounds, those with a cyclic molecular structure and at least two different atoms in the ring (or rings).
GO:0008152	metabolic process	The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.
GO:0007031	peroxisome organization	A process that is carried out at the cellular level which results in the assembly, arrangement of constituent parts, or disassembly of a peroxisome. A peroxisome is a small, membrane-bounded organelle that uses dioxygen (O2) to oxidize organic molecules.
GO:0065007	biological regulation	Any process that modulates a measurable attribute of any biological process, quality or function.
GO:0051179	localization	Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.
GO:0071840	cellular component organization or biogenesis	A process that results in the biosynthesis of constituent macromolecules, assembly, arrangement of constituent parts, or disassembly of a cellular component.
GO:0071840	cellular component organization or biogenesis	A process that results in the biosynthesis of constituent macromolecules, assembly, arrangement of constituent parts, or disassembly of a cellular component.
GO:0008152	metabolic process	The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.
GO:0008152	metabolic process	The chemical reactions and pathways, including anabolism and catabolism, by which living organisms transform chemical substances. Metabolic processes typically transform small molecules, but also include macromolecular processes such as DNA repair and replication, and protein synthesis and degradation.
GO:0050789	regulation of biological process	Any process that modulates the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.
GO:0043603	cellular amide metabolic process	The chemical reactions and pathways involving an amide, any derivative of an oxoacid in which an acidic hydroxy group has been replaced by an amino or substituted amino group, as carried out by individual cells.
GO:0009132	nucleoside diphosphate metabolic process	The chemical reactions and pathways involving a nucleoside diphosphate, a compound consisting of a nucleobase linked to a deoxyribose or ribose sugar esterified with diphosphate on the sugar.
GO:0009058	biosynthetic process	The chemical reactions and pathways resulting in the formation of substances; typically the energy-requiring part of metabolism in which simpler substances are transformed into more complex ones.
GO:0051179	localization	Any process in which a cell, a substance, or a cellular entity, such as a protein complex or organelle, is transported, tethered to or otherwise maintained in a specific location. In the case of substances, localization may also be achieved via selective degradation.
GO:0048519	negative regulation of biological process	Any process that stops, prevents, or reduces the frequency, rate or extent of a biological process. Biological processes are regulated by many means; examples include the control of gene expression, protein modification or interaction with a protein or substrate molecule.
GO:1901360	organic cyclic compound metabolic process	The chemical reactions and pathways involving organic cyclic compound.
GO:0022402	cell cycle process	The cellular process that ensures successive accurate and complete genome replication and chromosome segregation.
GO:0007163	establishment or maintenance of cell polarity	Any cellular process that results in the specification, formation or maintenance of anisotropic intracellular organization or cell growth patterns.
GO:0043170	macromolecule metabolic process	The chemical reactions and pathways involving macromolecules, any molecule of high relative molecular mass, the structure of which essentially comprises the multiple repetition of units derived, actually or conceptually, from molecules of low relative molecular mass.
GO:0009056	catabolic process	The chemical reactions and pathways resulting in the breakdown of substances, including the breakdown of carbon compounds with the liberation of energy for use by the cell or organism.
GO:0043170	macromolecule metabolic process	The chemical reactions and pathways involving macromolecules, any molecule of high relative molecular mass, the structure of which essentially comprises the multiple repetition of units derived, actually or conceptually, from molecules of low relative molecular mass.
GO:0051716	cellular response to stimulus	Any process that results in a change in state or activity of a cell (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a stimulus. The process begins with detection of the stimulus by a cell and ends with a change in state or activity or the cell.
GO:0016043	cellular component organization	A process that results in the assembly, arrangement of constituent parts, or disassembly of a cellular component.
GO:0009058	biosynthetic process	The chemical reactions and pathways resulting in the formation of substances; typically the energy-requiring part of metabolism in which simpler substances are transformed into more complex ones.

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V43	V44	V45	V46	V47	V48	V49	V50	V51	V52
0	1	1	1	1	2	2	1	2	2	⋯	5	3	5	2	4	3	3	3	3	3
1	0	1	1	1	1	2	2	1	2	⋯	5	3	5	2	4	3	3	3	2	2
1	1	0	2	2	2	3	2	2	3	⋯	6	2	6	3	5	4	2	2	3	3
1	1	2	0	2	2	3	2	2	3	⋯	6	4	6	3	5	4	4	4	3	3
1	1	2	2	0	1	1	1	1	1	⋯	4	4	4	1	3	2	4	4	2	2
2	1	2	2	1	0	1	2	2	2	⋯	5	4	5	2	4	3	4	4	1	1

GOID	TERM	DEFINITION
GO:0090114	COPII-coated vesicle budding	The evagination of an endoplasmic reticulum membrane, resulting in formation of a COPII-coated vesicle.
GO:0031146	SCF-dependent proteasomal ubiquitin-dependent protein catabolic process	The chemical reactions and pathways resulting in the breakdown of a protein or peptide by hydrolysis of its peptide bonds, initiated by the covalent attachment of ubiquitin, with ubiquitin-protein ligation catalyzed by an SCF (Skp1/Cul1/F-box protein) complex, and mediated by the proteasome.
GO:0009132	nucleoside diphosphate metabolic process	The chemical reactions and pathways involving a nucleoside diphosphate, a compound consisting of a nucleobase linked to a deoxyribose or ribose sugar esterified with diphosphate on the sugar.
GO:0044038	cell wall macromolecule biosynthetic process	The chemical reactions and pathways resulting in the formation of a macromolecule destined to form part of a cell wall.
GO:0043467	regulation of generation of precursor metabolites and energy	Any process that modulates the frequency, rate or extent of the chemical reactions and pathways resulting in the formation of precursor metabolites, substances from which energy is derived, and the processes involved in the liberation of energy from these substances.
GO:0008033	tRNA processing	The process in which a pre-tRNA molecule is converted to a mature tRNA, ready for addition of an aminoacyl group.
GO:0031113	regulation of microtubule polymerization	Any process that modulates the frequency, rate or extent of microtubule polymerization.
GO:0051123	RNA polymerase II transcriptional preinitiation complex assembly	The aggregation, arrangement and bonding together of proteins on an RNA polymerase II promoter DNA to form the transcriptional preinitiation complex (PIC), the formation of which is a prerequisite for transcription by RNA polymerase.
GO:0006896	Golgi to vacuole transport	The directed movement of substances from the Golgi to the vacuole.
GO:0016973	poly(A)+ mRNA export from nucleus	The directed movement of poly(A)+ mRNA out of the nucleus into the cytoplasm.
GO:0006576	cellular biogenic amine metabolic process	The chemical reactions and pathways occurring at the level of individual cells involving any of a group of naturally occurring, biologically active amines, such as norepinephrine, histamine, and serotonin, many of which act as neurotransmitters.
GO:0002181	cytoplasmic translation	The chemical reactions and pathways resulting in the formation of a protein in the cytoplasm. This is a ribosome-mediated process in which the information in messenger RNA (mRNA) is used to specify the sequence of amino acids in the protein.
GO:0040001	establishment of mitotic spindle localization	The cell cycle process in which the directed movement of the mitotic spindle to a specific location in the cell occurs.
GO:0016558	protein import into peroxisome matrix	The import of proteins into the peroxisomal matrix. A peroxisome targeting signal (PTS) binds to a soluble receptor protein in the cytosol, and the resulting complex then binds to a receptor protein in the peroxisome membrane and is imported. The cargo protein is then released into the peroxisome matrix.
GO:0006999	nuclear pore organization	A process that is carried out at the cellular level which results in the assembly, arrangement of constituent parts, or disassembly of the nuclear pore.
GO:0007096	regulation of exit from mitosis	Any process involved in the progression from anaphase/telophase to G1 that is associated with a conversion from high to low mitotic CDK activity.
GO:1904669	ATP export	The directed movement of ATP out of a cell or organelle.

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20
0	1	1	1	1	1	2	1	2	2	2	2	3	3	2	3	2	3	3	4
1	0	1	1	2	2	1	1	1	1	1	1	2	2	1	2	1	2	2	3
1	1	0	2	2	1	1	2	2	2	2	2	3	3	2	2	2	3	3	4
1	1	2	0	1	2	2	2	2	2	2	2	3	3	2	3	2	3	3	4
1	2	2	1	0	2	3	2	3	3	3	3	4	4	3	4	3	4	4	5
1	2	1	2	2	0	2	2	3	3	3	3	4	4	3	3	3	4	4	5

1.000	0.527	0.544	0.491	0.534	0.491	0.513	0.495	0.448	0.505	0.375	0.632	0.531
0.527	1.000	0.610	0.568	0.520	0.534	0.523	0.507	0.548	0.540	0.455	0.584	0.494
0.544	0.610	1.000	0.625	0.523	0.608	0.651	0.606	0.634	0.602	0.358	0.604	0.391
0.491	0.568	0.625	1.000	0.514	0.558	0.609	0.552	0.623	0.592	0.416	0.521	0.383
0.534	0.520	0.523	0.514	1.000	0.496	0.514	0.522	0.464	0.585	0.409	0.554	0.453
0.491	0.534	0.608	0.558	0.496	1.000	0.557	0.540	0.539	0.549	0.354	0.591	0.453

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V25	V26	V27	V28	V29	V30	V31	V32	V33	V34
0	1	2	2	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
1	0	1	1	1	1	1	1	1	2	⋯	2	2	2	3	4	4	2	3	1	2
2	1	0	1	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	1	0	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	2	2	0	1	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
2	1	2	2	1	0	1	2	2	3	⋯	2	3	3	3	4	4	3	4	2	3

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V22	V23	V24	V25	V26	V27	V28	V29	V30	V31
0	1	1	1	1	1	2	2	2	3	⋯	4	4	5	4	5	5	6	6	7	7
1	0	1	2	2	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	1	0	1	1	2	2	3	1	3	⋯	5	5	5	4	5	5	6	6	7	7
1	2	1	0	2	1	1	2	1	2	⋯	4	4	4	3	4	4	5	5	6	6
1	2	1	2	0	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	2	2	1	2	0	1	1	2	2	⋯	3	3	4	3	4	4	5	5	6	6

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V43	V44	V45	V46	V47	V48	V49	V50	V51	V52
0	1	1	1	1	2	2	1	2	2	⋯	5	3	5	2	4	3	3	3	3	3
1	0	1	1	1	1	2	2	1	2	⋯	5	3	5	2	4	3	3	3	2	2
1	1	0	2	2	2	3	2	2	3	⋯	6	2	6	3	5	4	2	2	3	3
1	1	2	0	2	2	3	2	2	3	⋯	6	4	6	3	5	4	4	4	3	3
1	1	2	2	0	1	1	1	1	1	⋯	4	4	4	1	3	2	4	4	2	2
2	1	2	2	1	0	1	2	2	2	⋯	5	4	5	2	4	3	4	4	1	1

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20
0	1	1	1	1	1	2	1	2	2	2	2	3	3	2	3	2	3	3	4
1	0	1	1	2	2	1	1	1	1	1	1	2	2	1	2	1	2	2	3
1	1	0	2	2	1	1	2	2	2	2	2	3	3	2	2	2	3	3	4
1	1	2	0	1	2	2	2	2	2	2	2	3	3	2	3	2	3	3	4
1	2	2	1	0	2	3	2	3	3	3	3	4	4	3	4	3	4	4	5
1	2	1	2	2	0	2	2	3	3	3	3	4	4	3	3	3	4	4	5

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13
0	1	1	1	2	2	1	1	2	2	1	1	1
1	0	1	2	1	1	2	2	3	1	2	2	2
1	1	0	1	1	2	2	2	3	2	2	2	2
1	2	1	0	2	3	1	2	2	3	2	2	2
2	1	1	2	0	1	3	3	4	2	3	3	3
2	1	2	3	1	0	3	3	4	1	3	3	3

	YML028W	YGL122C	YPL214C	YHL006C	YKL130C	YCR011C	YBL007C	YJR091C	YGL145W	YBR133C	YDR214W	YGR268C	YLR291C	YOR138C	YEL023C	YFR002W
YML028W	1.000	0.218	0.330	0.389	0.193	0.385	0.183	0.150	0.121	0.324	0.547	0.477	0.146	0.157	0.477	0.152
YGL122C	0.218	1.000	0.270	0.318	0.119	0.340	0.085	0.332	0.451	0.351	0.098	0.185	0.295	0.223	0.185	0.326
YPL214C	0.330	0.270	1.000	0.338	0.188	0.171	0.131	0.212	0.053	0.319	0.106	0.069	0.396	0.123	0.069	0.080
YHL006C	0.389	0.318	0.338	1.000	0.208	0.234	0.264	0.263	0.078	0.310	0.235	0.105	0.230	0.249	0.105	0.196
YKL130C	0.193	0.119	0.188	0.208	1.000	0.245	0.176	0.298	0.203	0.152	0.142	0.243	0.118	0.091	0.243	0.156
YCR011C	0.385	0.340	0.171	0.234	0.245	1.000	0.391	0.242	0.512	0.373	0.284	0.196	0.161	0.112	0.196	0.350
YBL007C	0.183	0.085	0.131	0.264	0.176	0.391	1.000	0.149	0.453	0.167	0.158	0.283	0.079	0.062	0.283	0.328
YJR091C	0.150	0.332	0.212	0.263	0.298	0.242	0.149	1.000	0.344	0.210	0.118	0.200	0.115	0.140	0.200	0.221
YGL145W	0.121	0.451	0.053	0.078	0.203	0.512	0.453	0.344	1.000	0.117	0.095	0.158	0.045	0.035	0.158	0.451
YBR133C	0.324	0.351	0.319	0.310	0.152	0.373	0.167	0.210	0.117	1.000	0.192	0.217	0.294	0.330	0.217	0.132
YDR214W	0.547	0.098	0.106	0.235	0.142	0.284	0.158	0.118	0.095	0.192	1.000	0.477	0.099	0.144	0.477	0.145
YGR268C	0.477	0.185	0.069	0.105	0.243	0.196	0.283	0.200	0.158	0.217	0.477	1.000	0.077	0.144	1.000	0.203
YLR291C	0.146	0.295	0.396	0.230	0.118	0.161	0.079	0.115	0.045	0.294	0.099	0.077	1.000	0.268	0.077	0.064
YOR138C	0.157	0.223	0.123	0.249	0.091	0.112	0.062	0.140	0.035	0.330	0.144	0.144	0.268	1.000	0.144	0.070
YEL023C	0.477	0.185	0.069	0.105	0.243	0.196	0.283	0.200	0.158	0.217	0.477	1.000	0.077	0.144	1.000	0.203
YFR002W	0.152	0.326	0.080	0.196	0.156	0.350	0.328	0.221	0.451	0.132	0.145	0.203	0.064	0.070	0.203	1.000

	go_id	Term	Definition
15591	GO:0018343	protein farnesylation	The covalent attachment of a farnesyl group to a protein.
15594	GO:0018344	protein geranylgeranylation	The covalent attachment of a geranylgeranyl group to a protein.
16626	GO:0006874	cellular calcium ion homeostasis	Any process involved in the maintenance of an internal steady state of calcium ions at the level of a cell.
17047	GO:0030010	establishment of cell polarity	The specification and formation of anisotropic intracellular organization or cell growth patterns.
48636	GO:0042127	regulation of cell proliferation	Any process that modulates the frequency, rate or extent of cell proliferation.
79331	GO:0070884	regulation of calcineurin-NFAT signaling cascade	Any process that modulates the frequency, rate or extent of the calcineurin-NFAT signaling cascade.

	p.values	go_id
GO:0006874	8.917154e-03	GO:0006874
GO:0042127	4.467135e-03	GO:0042127
GO:0070884	4.467135e-03	GO:0070884
GO:0018343	1.711546e-05	GO:0018343
GO:0018344	1.700630e-04	GO:0018344
GO:0030010	3.451281e-03	GO:0030010

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V25	V26	V27	V28	V29	V30	V31	V32	V33	V34
0	1	2	2	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
1	0	1	1	1	1	1	1	1	2	⋯	2	2	2	3	4	4	2	3	1	2
2	1	0	1	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	1	0	2	2	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	1
2	1	2	2	0	1	2	2	2	3	⋯	3	3	3	4	5	5	3	4	2	3
2	1	2	2	1	0	1	2	2	3	⋯	2	3	3	3	4	4	3	4	2	3

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V22	V23	V24	V25	V26	V27	V28	V29	V30	V31
0	1	1	1	1	1	2	2	2	3	⋯	4	4	5	4	5	5	6	6	7	7
1	0	1	2	2	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	1	0	1	1	2	2	3	1	3	⋯	5	5	5	4	5	5	6	6	7	7
1	2	1	0	2	1	1	2	1	2	⋯	4	4	4	3	4	4	5	5	6	6
1	2	1	2	0	2	3	3	2	4	⋯	5	5	6	5	6	6	7	7	8	8
1	2	2	1	2	0	1	1	2	2	⋯	3	3	4	3	4	4	5	5	6	6

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	⋯	V43	V44	V45	V46	V47	V48	V49	V50	V51	V52
0	1	1	1	1	2	2	1	2	2	⋯	5	3	5	2	4	3	3	3	3	3
1	0	1	1	1	1	2	2	1	2	⋯	5	3	5	2	4	3	3	3	2	2
1	1	0	2	2	2	3	2	2	3	⋯	6	2	6	3	5	4	2	2	3	3
1	1	2	0	2	2	3	2	2	3	⋯	6	4	6	3	5	4	4	4	3	3
1	1	2	2	0	1	1	1	1	1	⋯	4	4	4	1	3	2	4	4	2	2
2	1	2	2	1	0	1	2	2	2	⋯	5	4	5	2	4	3	4	4	1	1

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20
0	1	1	1	1	1	2	1	2	2	2	2	3	3	2	3	2	3	3	4
1	0	1	1	2	2	1	1	1	1	1	1	2	2	1	2	1	2	2	3
1	1	0	2	2	1	1	2	2	2	2	2	3	3	2	2	2	3	3	4
1	1	2	0	1	2	2	2	2	2	2	2	3	3	2	3	2	3	3	4
1	2	2	1	0	2	3	2	3	3	3	3	4	4	3	4	3	4	4	5
1	2	1	2	2	0	2	2	3	3	3	3	4	4	3	3	3	4	4	5

V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13
0	1	1	1	2	2	1	1	2	2	1	1	1
1	0	1	2	1	1	2	2	3	1	2	2	2
1	1	0	1	1	2	2	2	3	2	2	2	2
1	2	1	0	2	3	1	2	2	3	2	2	2
2	1	1	2	0	1	3	3	4	2	3	3	3
2	1	2	3	1	0	3	3	4	1	3	3	3