In [1]:
import goenrich
import numpy as np
import pandas as pd
import pickle
import networkx as nx
from spearmint_ghsom import main_no_labels as ghsom_main
In [2]:
def save_obj(obj, name):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
In [29]:
import sys
from time import sleep
from __future__ import division
len = 100
for i in range(100):
line = "[" + "=" * int(i * len / 100) + ">" + "-" * int((100 - i - 1) * len / 100) + "]"
sys.stdout.write("\r{}".format(line))
sys.stdout.flush()
# print line
sleep(0.1)
sys.stdout.write(" DONE")
[===================================================================================================>] DONE
In [4]:
import os
os.chdir("/home/david/Documents/ghsom")
G, map = load_obj('yeast_communities')
print 'num communities: {}'.format(len(map))
num communities: 5
In [5]:
import os
os.chdir("/home/david/Documents/ghsom")
dir_name = "uetz_communities"
if not os.path.isdir(dir_name):
os.mkdir(dir_name)
print 'made directory {}'.format(dir_name)
os.chdir(dir_name)
shortest_path = nx.floyd_warshall_numpy(map).astype(np.int)
np.savetxt("shortest_path.csv", shortest_path, fmt='%i', delimiter=",")
print 'written shortest path matrix'
c = 0
for n, d in map.nodes(data=True):
ls = d['ls']
with open('community_{}.txt'.format(c),'w') as f:
for l in ls:
f.write('{}\n'.format(G.node[l]['label']))
print 'written community_{}.txt'.format(c)
c += 1
made directory uetz_communities
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt
In [27]:
nx.adjacency_matrix(map).toarray().astype(np.int)
Out[27]:
array([[0, 0, 1, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[1, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 1, 0],
[0, 0, 0, ..., 1, 0, 1],
[0, 0, 0, ..., 0, 1, 0]])
In [28]:
n1, d1 = map.nodes(data=True)[1]
for n in map.neighbors(n1):
print n, len(map.node[n]['ls'])
3 72
4 86
5 37
7 45
8 45
13 187
In [29]:
n1, d1 = map.nodes(data=True)[1]
n1, d2 = map.nodes(data=True)[8]
H1 = G.subgraph(d1['ls'])
H2 = G.subgraph(d2['ls'])
l1 = [v for k,v in nx.get_node_attributes(H1, 'label').items()]
l2 = [v for k,v in nx.get_node_attributes(H2, 'label').items()]
print l2
[u'YJL155C', u'YNR046W', u'YBR190W', u'YDL238C', u'YBL051C', u'YLR257W', u'YJR047C', u'YIL151C', u'YIR015W', u'YLR322W', u'YJR046W', u'YBL101WA', u'YDR488C', u'YLR121C', u'YLR465C', u'YLR321C', u'YIR037W', u'YHR129C', u'YLR345W', u'YJL162C', u'YLR216C', u'YLR389C', u'YBR221C', u'YDR106W', u'YML035C', u'YEL034W', u'YJL070C', u'YMR168C', u'YBR111C', u'YKR096W', u'YIL132C', u'YLR376C', u'YLR117C', u'YDR504C', u'YHR068W', u'YDL111C', u'YBR175W', u'YBR284W', u'YBR244W', u'YAR003W', u'YDR140W', u'YJR008W', u'YDR424C', u'YGR158C', u'YPL070W']
In [30]:
O = goenrich.obo.ontology('db/go-basic.obo')
annot = goenrich.read.sgd('db/gene_association.sgd.gz')
gene2go = goenrich.read.gene2go('db/gene2go.gz')
values = {k: set(v) for k,v in annot.groupby('go_id')['db_object_symbol']}
# propagate the background through the ontology
background_attribute = 'gene2go'
goenrich.enrich.propagate(O, values, background_attribute)
In [6]:
annot.loc[annot['db_object_synonym'].str.contains('YOL020W')==True]
Out[6]:
db
db_object_id
db_object_symbol
qualifier
go_id
db_reference
evidence_code
with_from
aspect
db_object_name
db_object_synonym
db_object_type
taxon
date
assigned_by
annotation_extension
gene_product_form_id
89229
SGD
S000005380
TAT2
NaN
GO:0016021
SGD_REF:S000124036
IEA
InterPro:IPR004762
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89230
SGD
S000005380
TAT2
NaN
GO:0016021
SGD_REF:S000124036
IEA
InterPro:IPR004840
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89231
SGD
S000005380
TAT2
NaN
GO:0016020
SGD_REF:S000124036
IEA
InterPro:IPR002293
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89232
SGD
S000005380
TAT2
NaN
GO:0016020
SGD_REF:S000124036
IEA
InterPro:IPR004841
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89233
SGD
S000005380
TAT2
NaN
GO:0015171
SGD_REF:S000124036
IEA
InterPro:IPR002293
F
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89234
SGD
S000005380
TAT2
NaN
GO:0006810
SGD_REF:S000124036
IEA
InterPro:IPR004841
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89235
SGD
S000005380
TAT2
NaN
GO:0003824
SGD_REF:S000124036
IEA
InterPro:IPR013792
F
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89236
SGD
S000005380
TAT2
NaN
GO:0005886
SGD_REF:S000073805|PMID:12810702
IDA
NaN
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20030908
SGD
NaN
NaN
89237
SGD
S000005380
TAT2
NaN
GO:0016021
SGD_REF:S000114049|PMID:12192589
ISM
NaN
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20110531
SGD
NaN
NaN
89238
SGD
S000005380
TAT2
NaN
GO:0055085
SGD_REF:S000124036
IEA
InterPro:IPR004840
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89239
SGD
S000005380
TAT2
NaN
GO:0055085
SGD_REF:S000124036
IEA
InterPro:IPR004841
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89240
SGD
S000005380
TAT2
NaN
GO:0006865
SGD_REF:S000124036
IEA
InterPro:IPR004762
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89241
SGD
S000005380
TAT2
NaN
GO:0006865
SGD_REF:S000124036
IEA
InterPro:IPR004840
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89242
SGD
S000005380
TAT2
NaN
GO:0003333
SGD_REF:S000124036
IEA
InterPro:IPR002293
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
InterPro
NaN
NaN
89243
SGD
S000005380
TAT2
NaN
GO:0015173
SGD_REF:S000043244|PMID:10654085
IDA
NaN
F
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20020806
SGD
NaN
NaN
89244
SGD
S000005380
TAT2
NaN
GO:0016020
SGD_REF:S000148671
IEA
UniProtKB-SubCell:SL-0162
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
UniProt
NaN
NaN
89245
SGD
S000005380
TAT2
NaN
GO:0016020
SGD_REF:S000148669
IEA
UniProtKB-KW:KW-0472
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
UniProt
NaN
NaN
89246
SGD
S000005380
TAT2
NaN
GO:0005783
SGD_REF:S000183046|PMID:26928762
IDA
NaN
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20160603
SGD
NaN
NaN
89247
SGD
S000005380
TAT2
NaN
GO:0015827
SGD_REF:S000044967|PMID:7523855
IMP
NaN
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20071128
SGD
NaN
NaN
89248
SGD
S000005380
TAT2
NaN
GO:0005300
SGD_REF:S000044967|PMID:7523855
IMP
NaN
F
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20071128
SGD
NaN
NaN
89249
SGD
S000005380
TAT2
NaN
GO:0006810
SGD_REF:S000148669
IEA
UniProtKB-KW:KW-0813
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
UniProt
NaN
NaN
89250
SGD
S000005380
TAT2
NaN
GO:0005887
SGD_REF:S000185201
IBA
PANTHER:PTN000208210
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20150304
GO_Central
NaN
NaN
89251
SGD
S000005380
TAT2
NaN
GO:0015297
SGD_REF:S000185201
IBA
PANTHER:PTN000208210
F
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20150304
GO_Central
NaN
NaN
89252
SGD
S000005380
TAT2
NaN
GO:0005773
SGD_REF:S000185201
IBA
PANTHER:PTN000926881
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20150304
GO_Central
NaN
NaN
89253
SGD
S000005380
TAT2
NaN
GO:0006865
SGD_REF:S000148669
IEA
UniProtKB-KW:KW-0029
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
UniProt
NaN
NaN
89254
SGD
S000005380
TAT2
NaN
GO:0016021
SGD_REF:S000148669
IEA
UniProtKB-KW:KW-0812
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20170107
UniProt
NaN
NaN
89255
SGD
S000005380
TAT2
NaN
GO:0005886
SGD_REF:S000043244|PMID:10654085
ISS
NaN
C
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20020806
SGD
NaN
NaN
89256
SGD
S000005380
TAT2
NaN
GO:0015801
SGD_REF:S000043244|PMID:10654085
IDA
NaN
P
High affinity tryptophan and tyrosine permease
YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci...
gene
taxon:559292
20020806
SGD
NaN
NaN
In [31]:
q1 = np.array(l1)
q2 = np.array(l2)
In [32]:
for q in q1:
print q
print
for q in q2:
print q
YJR058C
YHR105W
YBR135W
YBR160W
YNL037C
YGL156W
YJL138C
YBR254C
YLR102C
YOR379C
YIR018W
YOR136W
YPL128C
YKR059W
YHR112C
YPL280W
YML042W
YDR421W
YOR155C
YHR113W
YNL044W
YLR021W
YJR021C
YLR347C
YFL010C
YMR096W
YDL164C
YDR027C
YML108W
YGL015C
YLR442C
YNL098C
YFL059W
YDL066W
YFL017WA
YCL033C
YIL053W
YBR176W
YMR322C
YER062C
YMR276W
YPL201C
YKL090W
YDR256C
YPL091W
YLR245C
YMR218C
YDR122W
YGL213C
YMR308C
YOR097C
YGL071W
YGR024C
YHR156C
YNL334C
YNL316C
YPL088W
YGL037C
YKL103C
YBL037W
YGL175C
YIL119C
YLR315W
YOL145C
YHL009C
YNL222W
YDR189W
YGR108W
YKR068C
YFL039C
YGR144W
YIL051C
YPL125W
YGL187C
YDR244W
YJR052W
YBR107C
YPR054W
YPR049C
YDR502C
YBL057C
YPR062W
YHR039C
YOR391C
YDR480W
YMR033W
YJR112W
YBR252W
YGR058W
YPR193C
YOR115C
YPL110C
YGR267C
YOL082W
YNL138W
YDR472W
YGR179C
YPL251W
YFL029C
YJL199C
YML077W
YMR092C
YIL082W
YPR160W
YLR377C
YNL107W
YOR380W
YFL060C
YJL104W
YJL103C
YHL018W
YOR095C
YOL054W
YDR518W
YLR319C
YNL153C
YJR010W
YLR026C
YMR095C
YJL155C
YNR046W
YBR190W
YDL238C
YBL051C
YLR257W
YJR047C
YIL151C
YIR015W
YLR322W
YJR046W
YBL101WA
YDR488C
YLR121C
YLR465C
YLR321C
YIR037W
YHR129C
YLR345W
YJL162C
YLR216C
YLR389C
YBR221C
YDR106W
YML035C
YEL034W
YJL070C
YMR168C
YBR111C
YKR096W
YIL132C
YLR376C
YLR117C
YDR504C
YHR068W
YDL111C
YBR175W
YBR284W
YBR244W
YAR003W
YDR140W
YJR008W
YDR424C
YGR158C
YPL070W
In [33]:
# for additional export to graphviz just specify the gvfile argument
# the show argument keeps the graph reasonably small
df1 = goenrich.enrich.analyze(O, q1, background_attribute)
df1 = df1.sort_values('q').dropna()
df1 = df1.loc[df1['rejected'] == 0.0]
df1 = df1.loc[df1['x'] > 0]
df2 = goenrich.enrich.analyze(O, q2, background_attribute)
df2 = df2.sort_values('q').dropna()
df2 = df2.loc[df2['rejected'] == 0.0]
df2 = df2.loc[df2['x'] > 0]
In [34]:
df1
Out[34]:
M
N
n
name
namespace
p
q
rejected
term
x
29044
6448
119
5
cystathionine gamma-synthase activity
molecular_function
0.003257
0.429369
0.0
GO:0003962
1
38848
6448
119
15
pteridine-containing compound biosynthetic pro...
biological_process
0.030318
0.429369
0.0
GO:0042559
1
38847
6448
119
19
pteridine-containing compound metabolic process
biological_process
0.047076
0.429369
0.0
GO:0042558
1
34339
6448
119
20
cysteine metabolic process
biological_process
0.051689
0.429369
0.0
GO:0006534
1
33933
6448
119
12
homocysteine metabolic process
biological_process
0.019754
0.429369
0.0
GO:0050667
1
18286
6448
119
10
carbon-sulfur lyase activity
molecular_function
0.013797
0.429369
0.0
GO:0016846
1
3702
6448
119
5
cystathionine gamma-lyase activity
molecular_function
0.003257
0.429369
0.0
GO:0004123
1
4729
6448
119
7
aryl-alcohol dehydrogenase (NAD+) activity
molecular_function
0.006676
0.429369
0.0
GO:0018456
1
23198
6448
119
36
hydro-lyase activity
molecular_function
0.142097
0.433662
0.0
GO:0016836
1
6869
6448
119
44
pyridoxal phosphate binding
molecular_function
0.194635
0.455170
0.0
GO:0030170
1
3628
6448
119
44
sulfur amino acid biosynthetic process
biological_process
0.194635
0.455170
0.0
GO:0000097
1
41812
6448
119
44
vitamin B6 binding
molecular_function
0.194635
0.455170
0.0
GO:0070279
1
13438
6448
119
45
serine family amino acid metabolic process
biological_process
0.201378
0.456891
0.0
GO:0009069
1
8738
6448
119
47
transferase activity, transferring alkyl or ar...
molecular_function
0.214944
0.460424
0.0
GO:0016765
1
24572
6448
119
47
methionine metabolic process
biological_process
0.214944
0.460424
0.0
GO:0006555
1
23197
6448
119
47
carbon-oxygen lyase activity
molecular_function
0.214944
0.460424
0.0
GO:0016835
1
3629
6448
119
50
sulfur amino acid metabolic process
biological_process
0.235441
0.482982
0.0
GO:0000096
1
21384
6448
119
54
cellular aldehyde metabolic process
biological_process
0.262927
0.504500
0.0
GO:0006081
1
37288
6448
119
59
vitamin binding
molecular_function
0.297304
0.526296
0.0
GO:0019842
1
6725
6448
119
115
lyase activity
molecular_function
0.356954
0.578014
0.0
GO:0016829
2
7476
6448
119
73
prospore membrane
cellular_component
0.391610
0.604663
0.0
GO:0005628
1
9027
6448
119
73
ascospore-type prospore
cellular_component
0.391610
0.604663
0.0
GO:0042764
1
9032
6448
119
73
intracellular immature spore
cellular_component
0.391610
0.604663
0.0
GO:0042763
1
6021
6448
119
80
oxidoreductase activity, acting on the CH-OH g...
molecular_function
0.436723
0.644543
0.0
GO:0016616
1
37100
6448
119
84
aspartate family amino acid metabolic process
biological_process
0.461694
0.668550
0.0
GO:0009066
1
6019
6448
119
86
oxidoreductase activity, acting on CH-OH group...
molecular_function
0.473938
0.679948
0.0
GO:0016614
1
5117
6448
119
87
sulfur compound biosynthetic process
biological_process
0.479998
0.682674
0.0
GO:0044272
1
11167
6448
119
99
coenzyme biosynthetic process
biological_process
0.549294
0.746683
0.0
GO:0009108
1
14303
6448
119
130
alpha-amino acid biosynthetic process
biological_process
0.697301
0.864811
0.0
GO:1901607
1
22978
6448
119
133
cofactor biosynthetic process
biological_process
0.709276
0.873489
0.0
GO:0051188
1
18930
6448
119
138
cellular amino acid biosynthetic process
biological_process
0.728362
0.890925
0.0
GO:0008652
1
20221
6448
119
147
sulfur compound metabolic process
biological_process
0.760068
0.915304
0.0
GO:0006790
1
11206
6448
119
163
cofactor binding
molecular_function
0.808624
0.949017
0.0
GO:0048037
1
7116
6448
119
179
coenzyme metabolic process
biological_process
0.848305
0.976860
0.0
GO:0006732
1
10436
6448
119
452
oxidation-reduction process
biological_process
0.998362
0.999936
0.0
GO:0055114
1
11620
6448
119
317
small molecule biosynthetic process
biological_process
0.982954
0.999936
0.0
GO:0044283
1
11560
6448
119
223
cofactor metabolic process
biological_process
0.922048
0.999936
0.0
GO:0051186
1
13680
6448
119
435
carboxylic acid metabolic process
biological_process
0.997782
0.999936
0.0
GO:0019752
1
14301
6448
119
206
alpha-amino acid metabolic process
biological_process
0.898756
0.999936
0.0
GO:1901605
1
21385
6448
119
453
organic acid metabolic process
biological_process
0.998391
0.999936
0.0
GO:0006082
1
33664
6448
119
452
oxoacid metabolic process
biological_process
0.998362
0.999936
0.0
GO:0043436
1
28721
6448
119
353
oxidoreductase activity
molecular_function
0.990728
0.999936
0.0
GO:0016491
1
1629
6448
119
207
organic acid biosynthetic process
biological_process
0.900287
0.999936
0.0
GO:0016053
1
8419
6448
119
207
carboxylic acid biosynthetic process
biological_process
0.900287
0.999936
0.0
GO:0046394
1
40923
6448
119
267
cellular amino acid metabolic process
biological_process
0.961190
0.999936
0.0
GO:0006520
1
In [16]:
pd.merge(df1, df2, on=['M', 'N', 'n', 'name', 'namespace', 'term'], how='inner')
Out[16]:
M
N
n
name
namespace
p_x
q_x
rejected_x
term
x_x
p_y
q_y
rejected_y
x_y
0
6401
56
56
positive regulation of response to stimulus
biological_process
0.085798
0.210427
0.0
GO:0048584
1
0.085798
0.211483
0.0
1
1
6401
56
58
regulation of growth
biological_process
0.091096
0.211036
0.0
GO:0040008
1
0.091096
0.212521
0.0
1
2
6401
56
129
condensed chromosome
cellular_component
0.102753
0.221501
0.0
GO:0000793
2
0.312062
0.413093
0.0
1
3
6401
56
69
spindle
cellular_component
0.121795
0.235872
0.0
GO:0005819
1
0.121795
0.236025
0.0
1
4
6401
56
75
transcription cofactor activity
molecular_function
0.139479
0.249051
0.0
GO:0003712
1
0.139479
0.249353
0.0
1
5
6401
56
151
developmental process involved in reproduction
biological_process
0.145124
0.255507
0.0
GO:0003006
2
0.382425
0.476899
0.0
1
6
6401
56
255
cellular bud
cellular_component
0.182871
0.287851
0.0
GO:0005933
3
0.660165
0.720536
0.0
1
7
6401
56
346
single organism reproductive process
biological_process
0.183832
0.288572
0.0
GO:0044702
4
0.814314
0.851276
0.0
1
8
6401
56
269
meiotic cell cycle process
biological_process
0.208022
0.310278
0.0
GO:1903046
3
0.689067
0.746278
0.0
1
9
6401
56
488
reproduction
biological_process
0.252666
0.353181
0.0
GO:0000003
5
0.811360
0.848790
0.0
2
10
6401
56
111
condensed nuclear chromosome
cellular_component
0.253429
0.353913
0.0
GO:0000794
1
0.253429
0.356445
0.0
1
11
6401
56
206
chromosomal region
cellular_component
0.268796
0.368563
0.0
GO:0098687
2
0.542748
0.619203
0.0
1
12
6401
56
310
meiotic cell cycle
biological_process
0.286589
0.383332
0.0
GO:0051321
3
0.762466
0.808252
0.0
1
13
6401
56
127
chromatin silencing
biological_process
0.305569
0.403773
0.0
GO:0006342
1
0.305569
0.408164
0.0
1
14
6401
56
128
regulation of signal transduction
biological_process
0.308817
0.405976
0.0
GO:0009966
1
0.308817
0.409806
0.0
1
15
6401
56
133
regulation of signaling
biological_process
0.325012
0.422086
0.0
GO:0023051
1
0.325012
0.425082
0.0
1
16
6401
56
134
gene silencing
biological_process
0.328241
0.424969
0.0
GO:0016458
1
0.328241
0.427598
0.0
1
17
6401
56
135
regulation of cell communication
biological_process
0.331467
0.428769
0.0
GO:0010646
1
0.331467
0.431228
0.0
1
18
6401
56
135
microtubule cytoskeleton
cellular_component
0.331467
0.428769
0.0
GO:0015630
1
0.331467
0.431228
0.0
1
19
6401
56
137
transcription factor activity, transcription f...
molecular_function
0.337905
0.435115
0.0
GO:0000989
1
0.117517
0.231733
0.0
2
20
6401
56
235
cytoskeletal part
cellular_component
0.338735
0.435115
0.0
GO:0044430
2
0.615315
0.685252
0.0
1
21
6401
56
145
negative regulation of cellular component orga...
biological_process
0.363476
0.457722
0.0
GO:0051129
1
0.133034
0.245950
0.0
2
22
6401
56
248
cytoskeleton
cellular_component
0.370175
0.464175
0.0
GO:0005856
2
0.644949
0.707875
0.0
1
23
6401
56
362
mitotic cell cycle process
biological_process
0.391279
0.482430
0.0
GO:1903047
3
0.834003
0.868471
0.0
1
24
6401
56
155
transcription factor activity, protein binding
molecular_function
0.394933
0.485400
0.0
GO:0000988
1
0.153383
0.261302
0.0
2
25
6401
56
472
reproductive process
biological_process
0.397358
0.487682
0.0
GO:0022414
4
0.792598
0.833302
0.0
2
26
6401
56
157
nuclear envelope
cellular_component
0.401145
0.490696
0.0
GO:0005635
1
0.401145
0.492740
0.0
1
27
6401
56
373
mitotic cell cycle
biological_process
0.413449
0.503240
0.0
GO:0000278
3
0.846450
0.878639
0.0
1
28
6401
56
176
regulation of response to stimulus
biological_process
0.458630
0.542553
0.0
GO:0048583
1
0.458630
0.546721
0.0
1
29
6401
56
295
cell division
biological_process
0.480811
0.562919
0.0
GO:0051301
2
0.737489
0.787153
0.0
1
30
6401
56
189
mRNA binding
molecular_function
0.496155
0.576873
0.0
GO:0003729
1
0.496155
0.579605
0.0
1
31
6401
56
302
developmental process
biological_process
0.496607
0.576873
0.0
GO:0032502
2
0.749405
0.795838
0.0
1
32
6401
56
192
intracellular signal transduction
biological_process
0.504586
0.583341
0.0
GO:0035556
1
0.504586
0.585869
0.0
1
33
6401
56
309
nuclear division
biological_process
0.512174
0.589110
0.0
GO:0000280
2
0.760865
0.807368
0.0
1
34
6401
56
198
mitotic nuclear division
biological_process
0.521183
0.598073
0.0
GO:0007067
1
0.521183
0.601115
0.0
1
35
6401
56
317
regulation of organelle organization
biological_process
0.529668
0.605922
0.0
GO:0033043
2
0.529668
0.608519
0.0
2
36
6401
56
435
regulation of cellular component organization
biological_process
0.534164
0.608938
0.0
GO:0051128
3
0.743467
0.791528
0.0
2
37
6401
56
320
organelle fission
biological_process
0.536142
0.610103
0.0
GO:0048285
2
0.777979
0.822626
0.0
1
38
6401
56
351
negative regulation of biosynthetic process
biological_process
0.600094
0.667576
0.0
GO:0009890
2
0.191397
0.295722
0.0
4
39
6401
56
354
negative regulation of nitrogen compound metab...
biological_process
0.605982
0.673110
0.0
GO:0051172
2
0.195991
0.300049
0.0
4
40
6401
56
368
nucleolus
cellular_component
0.632725
0.694495
0.0
GO:0005730
2
0.840899
0.874695
0.0
1
41
6401
56
291
signal transduction
biological_process
0.730471
0.776850
0.0
GO:0007165
1
0.730471
0.780793
0.0
1
42
6401
56
291
single organism signaling
biological_process
0.730471
0.776850
0.0
GO:0044700
1
0.730471
0.780793
0.0
1
43
6401
56
291
signaling
biological_process
0.730471
0.776850
0.0
GO:0023052
1
0.730471
0.780793
0.0
1
44
6401
56
433
negative regulation of cellular metabolic process
biological_process
0.740574
0.785046
0.0
GO:0031324
2
0.328057
0.427547
0.0
4
45
6401
56
436
chromosomal part
cellular_component
0.744904
0.788785
0.0
GO:0044427
2
0.333350
0.431904
0.0
4
46
6401
56
301
nuclear chromosome part
cellular_component
0.747731
0.790926
0.0
GO:0044454
1
0.747731
0.794633
0.0
1
47
6401
56
326
chromatin organization
biological_process
0.786866
0.828753
0.0
GO:0006325
1
0.786866
0.830236
0.0
1
48
6401
56
331
nuclear chromosome
cellular_component
0.794037
0.833328
0.0
GO:0000228
1
0.559465
0.633372
0.0
2
49
6401
56
476
chromosome
cellular_component
0.797424
0.835100
0.0
GO:0005694
2
0.234963
0.338549
0.0
5
50
6401
56
487
negative regulation of macromolecule metabolic...
biological_process
0.810230
0.845507
0.0
GO:0010605
2
0.424038
0.514430
0.0
4
51
6401
56
388
endoplasmic reticulum membrane
cellular_component
0.862087
0.892357
0.0
GO:0005789
1
0.862087
0.891105
0.0
1
52
6401
56
398
cellular macromolecule catabolic process
biological_process
0.871704
0.899469
0.0
GO:0044265
1
0.871704
0.897584
0.0
1
53
6401
56
401
nuclear outer membrane-endoplasmic reticulum m...
cellular_component
0.874470
0.901692
0.0
GO:0042175
1
0.874470
0.899803
0.0
1
54
6401
56
406
cell communication
biological_process
0.878960
0.904905
0.0
GO:0007154
1
0.878960
0.902071
0.0
1
55
6401
56
411
positive regulation of biosynthetic process
biological_process
0.883304
0.907943
0.0
GO:0009891
1
0.707089
0.760489
0.0
2
56
6401
56
412
endoplasmic reticulum part
cellular_component
0.884156
0.908502
0.0
GO:0044432
1
0.884156
0.905976
0.0
1
57
6401
56
422
positive regulation of nitrogen compound metab...
biological_process
0.892370
0.915666
0.0
GO:0051173
1
0.509691
0.590169
0.0
3
58
6401
56
433
macromolecule catabolic process
biological_process
0.900792
0.922701
0.0
GO:0009057
1
0.900792
0.920463
0.0
1
59
6401
56
487
positive regulation of macromolecule metabolic...
biological_process
0.934048
0.948200
0.0
GO:0010604
1
0.625802
0.693938
0.0
3
In [71]:
df
Out[71]:
M
N
n
name
namespace
p
q
rejected
term
x
26805
6401
85
4
monopolin complex
cellular_component
0.000009
0.026402
1.0
GO:0033551
2
9772
6401
85
7
nucleolus organization
biological_process
0.000076
0.112211
0.0
GO:0007000
2
23972
6401
85
9
TORC2 complex
cellular_component
0.000179
0.176111
0.0
GO:0031932
2
8593
6401
85
6
transcription factor activity, RNA polymerase ...
molecular_function
0.077105
0.301874
0.0
GO:0001004
0
26800
6401
85
5
histone deacetylase binding
molecular_function
0.064675
0.301874
0.0
GO:0042826
0
11208
6401
85
5
positive regulation of intracellular transport
biological_process
0.064675
0.301874
0.0
GO:0032388
0
11213
6401
85
28
regulation of intracellular transport
biological_process
0.052713
0.301874
0.0
GO:0032386
1
26768
6401
85
8
response to oxygen levels
biological_process
0.101478
0.301874
0.0
GO:0070482
0
11270
6401
85
8
protein phosphorylated amino acid binding
molecular_function
0.101478
0.301874
0.0
GO:0045309
0
26702
6401
85
15
meiotic sister chromatid segregation
biological_process
0.016358
0.301874
0.0
GO:0045144
1
26701
6401
85
35
homologous chromosome segregation
biological_process
0.010890
0.301874
0.0
GO:0045143
2
26699
6401
85
6
meiotic telomere clustering
biological_process
0.077105
0.301874
0.0
GO:0045141
0
11317
6401
85
6
transcription factor activity, core RNA polyme...
molecular_function
0.077105
0.301874
0.0
GO:0001139
0
26682
6401
85
6
chromosome localization to nuclear envelope in...
biological_process
0.077105
0.301874
0.0
GO:0090220
0
26607
6401
85
6
tRNA methyltransferase complex
cellular_component
0.077105
0.301874
0.0
GO:0043527
0
26592
6401
85
5
fatty acid ligase activity
molecular_function
0.064675
0.301874
0.0
GO:0015645
0
11376
6401
85
72
condensed chromosome, centromeric region
cellular_component
0.070128
0.301874
0.0
GO:0000779
2
11379
6401
85
4
mating pheromone activity
molecular_function
0.052080
0.301874
0.0
GO:0000772
0
11381
6401
85
7
adenyl-nucleotide exchange factor activity
molecular_function
0.089372
0.301874
0.0
GO:0000774
0
26566
6401
85
28
calcium ion binding
molecular_function
0.052713
0.301874
0.0
GO:0005509
1
11383
6401
85
73
kinetochore
cellular_component
0.072459
0.301874
0.0
GO:0000776
2
11384
6401
85
66
condensed chromosome kinetochore
cellular_component
0.056892
0.301874
0.0
GO:0000777
2
26542
6401
85
4
regulation of MAPK cascade involved in cell wa...
biological_process
0.052080
0.301874
0.0
GO:1903137
0
11463
6401
85
6
glycolytic fermentation to ethanol
biological_process
0.077105
0.301874
0.0
GO:0019655
0
11497
6401
85
5
phosphatidylinositol kinase activity
molecular_function
0.064675
0.301874
0.0
GO:0052742
0
26477
6401
85
12
TOR complex
cellular_component
0.000456
0.301874
0.0
GO:0038201
2
26475
6401
85
5
TORC2 signaling
biological_process
0.001698
0.301874
0.0
GO:0038203
1
26447
6401
85
4
microtubule depolymerization
biological_process
0.052080
0.301874
0.0
GO:0007019
0
26435
6401
85
8
Arp2/3 protein complex
cellular_component
0.101478
0.301874
0.0
GO:0005885
0
26432
6401
85
7
nuclear microtubule
cellular_component
0.089372
0.301874
0.0
GO:0005880
0
...
...
...
...
...
...
...
...
...
...
...
23751
6401
85
383
cellular response to chemical stimulus
biological_process
0.994909
0.998977
0.0
GO:0070887
0
24269
6401
85
405
regulation of molecular function
biological_process
0.996279
0.998977
0.0
GO:0065009
0
22325
6401
85
348
rRNA processing
biological_process
0.991636
0.998977
0.0
GO:0006364
0
32893
6401
85
317
protein targeting
biological_process
0.987050
0.998977
0.0
GO:0006605
0
17373
6401
85
327
homeostatic process
biological_process
0.988751
0.998977
0.0
GO:0042592
0
5819
6401
85
368
carbohydrate derivative metabolic process
biological_process
0.993700
0.998977
0.0
GO:1901135
0
5693
6401
85
417
substrate-specific transporter activity
molecular_function
0.996865
0.998977
0.0
GO:0022892
0
5690
6401
85
343
substrate-specific transmembrane transporter a...
molecular_function
0.991024
0.998977
0.0
GO:0022891
0
18759
6401
85
365
hydrolase activity, acting on ester bonds
molecular_function
0.993426
0.998977
0.0
GO:0016788
0
28959
6401
85
396
mitochondrial membrane
cellular_component
0.995769
0.998977
0.0
GO:0031966
0
35767
6401
85
467
transmembrane transport
biological_process
0.998472
0.998977
0.0
GO:0055085
0
17971
6401
85
472
ribosome biogenesis
biological_process
0.998578
0.998977
0.0
GO:0042254
0
41175
6401
85
327
translational elongation
biological_process
0.988751
0.998977
0.0
GO:0006414
0
13680
6401
85
435
carboxylic acid metabolic process
biological_process
0.997578
0.998977
0.0
GO:0019752
0
13067
6401
85
494
intracellular protein transport
biological_process
0.991507
0.998977
0.0
GO:0006886
1
28721
6401
85
353
oxidoreductase activity
molecular_function
0.992208
0.998977
0.0
GO:0016491
0
7504
6401
85
395
structural molecule activity
molecular_function
0.995708
0.998977
0.0
GO:0005198
0
16863
6401
85
321
establishment of protein localization to organ...
biological_process
0.987759
0.998977
0.0
GO:0072594
0
33664
6401
85
452
oxoacid metabolic process
biological_process
0.998103
0.998977
0.0
GO:0043436
0
24838
6401
85
402
transferase complex
cellular_component
0.996116
0.998977
0.0
GO:1990234
0
1634
6401
85
407
organophosphate metabolic process
biological_process
0.996384
0.998977
0.0
GO:0019637
0
10436
6401
85
452
oxidation-reduction process
biological_process
0.998103
0.998977
0.0
GO:0055114
0
15908
6401
85
373
base pairing
molecular_function
0.994131
0.998977
0.0
GO:0000496
0
27263
6401
85
380
transmembrane transporter activity
molecular_function
0.994687
0.998977
0.0
GO:0022857
0
16757
6401
85
387
regulation of catalytic activity
biological_process
0.995191
0.998977
0.0
GO:0050790
0
25303
6401
85
475
transporter activity
molecular_function
0.998638
0.998977
0.0
GO:0005215
0
21385
6401
85
453
organic acid metabolic process
biological_process
0.998130
0.998977
0.0
GO:0006082
0
13085
6401
85
315
mRNA metabolic process
biological_process
0.986680
0.998977
0.0
GO:0016071
0
22505
6401
85
385
vesicle-mediated transport
biological_process
0.995052
0.998977
0.0
GO:0016192
0
3593
6401
85
498
DNA metabolic process
biological_process
0.999024
0.999024
0.0
GO:0006259
0
2948 rows × 10 columns
In [72]:
# generate html
df.dropna().head(n = 20).to_html('q2.html')
# df.head(n = 20).to_html('example.html')
In [ ]:
Content source: DavidMcDonald1993/ghsom
Similar notebooks: