In [1]:
import goenrich
import numpy as np
import pandas as pd
import pickle
import networkx as nx
from spearmint_ghsom import main_no_labels as ghsom_main

In [2]:
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [29]:
import sys
from time import sleep
from __future__ import division

len = 100

for i in range(100):
    line = "[" + "=" * int(i * len / 100) + ">" + "-" * int((100 - i - 1) * len / 100) + "]"
    sys.stdout.write("\r{}".format(line))
    sys.stdout.flush()
#     print line
    sleep(0.1)
sys.stdout.write(" DONE")


[===================================================================================================>] DONE

In [4]:
import os

os.chdir("/home/david/Documents/ghsom")

G, map = load_obj('yeast_communities')
print 'num communities: {}'.format(len(map))


num communities: 5

In [5]:
import os

os.chdir("/home/david/Documents/ghsom")

dir_name = "uetz_communities"

if not os.path.isdir(dir_name):
    os.mkdir(dir_name)
    print 'made directory {}'.format(dir_name)
    
os.chdir(dir_name)

shortest_path = nx.floyd_warshall_numpy(map).astype(np.int)
np.savetxt("shortest_path.csv", shortest_path, fmt='%i', delimiter=",")
print 'written shortest path matrix'

c = 0
for n, d in map.nodes(data=True):
    ls = d['ls']
    with open('community_{}.txt'.format(c),'w') as f:
        for l in ls:
            f.write('{}\n'.format(G.node[l]['label']))
    print 'written community_{}.txt'.format(c)
    c += 1


made directory uetz_communities
written shortest path matrix
written community_0.txt
written community_1.txt
written community_2.txt
written community_3.txt
written community_4.txt

In [27]:
nx.adjacency_matrix(map).toarray().astype(np.int)


Out[27]:
array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 1, 0, 1],
       [0, 0, 0, ..., 0, 1, 0]])

In [28]:
n1, d1 = map.nodes(data=True)[1]

for n in map.neighbors(n1):
    print n, len(map.node[n]['ls'])


3 72
4 86
5 37
7 45
8 45
13 187

In [29]:
n1, d1 = map.nodes(data=True)[1]
n1, d2 = map.nodes(data=True)[8]

H1 = G.subgraph(d1['ls'])
H2 = G.subgraph(d2['ls'])

l1 = [v for k,v in nx.get_node_attributes(H1, 'label').items()]
l2 = [v for k,v in nx.get_node_attributes(H2, 'label').items()]
print l2


[u'YJL155C', u'YNR046W', u'YBR190W', u'YDL238C', u'YBL051C', u'YLR257W', u'YJR047C', u'YIL151C', u'YIR015W', u'YLR322W', u'YJR046W', u'YBL101WA', u'YDR488C', u'YLR121C', u'YLR465C', u'YLR321C', u'YIR037W', u'YHR129C', u'YLR345W', u'YJL162C', u'YLR216C', u'YLR389C', u'YBR221C', u'YDR106W', u'YML035C', u'YEL034W', u'YJL070C', u'YMR168C', u'YBR111C', u'YKR096W', u'YIL132C', u'YLR376C', u'YLR117C', u'YDR504C', u'YHR068W', u'YDL111C', u'YBR175W', u'YBR284W', u'YBR244W', u'YAR003W', u'YDR140W', u'YJR008W', u'YDR424C', u'YGR158C', u'YPL070W']

In [30]:
O = goenrich.obo.ontology('db/go-basic.obo')

annot = goenrich.read.sgd('db/gene_association.sgd.gz')
gene2go = goenrich.read.gene2go('db/gene2go.gz')
values = {k: set(v) for k,v in annot.groupby('go_id')['db_object_symbol']}

# propagate the background through the ontology
background_attribute = 'gene2go'
goenrich.enrich.propagate(O, values, background_attribute)

In [6]:
annot.loc[annot['db_object_synonym'].str.contains('YOL020W')==True]


Out[6]:
db db_object_id db_object_symbol qualifier go_id db_reference evidence_code with_from aspect db_object_name db_object_synonym db_object_type taxon date assigned_by annotation_extension gene_product_form_id
89229 SGD S000005380 TAT2 NaN GO:0016021 SGD_REF:S000124036 IEA InterPro:IPR004762 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89230 SGD S000005380 TAT2 NaN GO:0016021 SGD_REF:S000124036 IEA InterPro:IPR004840 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89231 SGD S000005380 TAT2 NaN GO:0016020 SGD_REF:S000124036 IEA InterPro:IPR002293 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89232 SGD S000005380 TAT2 NaN GO:0016020 SGD_REF:S000124036 IEA InterPro:IPR004841 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89233 SGD S000005380 TAT2 NaN GO:0015171 SGD_REF:S000124036 IEA InterPro:IPR002293 F High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89234 SGD S000005380 TAT2 NaN GO:0006810 SGD_REF:S000124036 IEA InterPro:IPR004841 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89235 SGD S000005380 TAT2 NaN GO:0003824 SGD_REF:S000124036 IEA InterPro:IPR013792 F High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89236 SGD S000005380 TAT2 NaN GO:0005886 SGD_REF:S000073805|PMID:12810702 IDA NaN C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20030908 SGD NaN NaN
89237 SGD S000005380 TAT2 NaN GO:0016021 SGD_REF:S000114049|PMID:12192589 ISM NaN C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20110531 SGD NaN NaN
89238 SGD S000005380 TAT2 NaN GO:0055085 SGD_REF:S000124036 IEA InterPro:IPR004840 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89239 SGD S000005380 TAT2 NaN GO:0055085 SGD_REF:S000124036 IEA InterPro:IPR004841 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89240 SGD S000005380 TAT2 NaN GO:0006865 SGD_REF:S000124036 IEA InterPro:IPR004762 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89241 SGD S000005380 TAT2 NaN GO:0006865 SGD_REF:S000124036 IEA InterPro:IPR004840 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89242 SGD S000005380 TAT2 NaN GO:0003333 SGD_REF:S000124036 IEA InterPro:IPR002293 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 InterPro NaN NaN
89243 SGD S000005380 TAT2 NaN GO:0015173 SGD_REF:S000043244|PMID:10654085 IDA NaN F High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20020806 SGD NaN NaN
89244 SGD S000005380 TAT2 NaN GO:0016020 SGD_REF:S000148671 IEA UniProtKB-SubCell:SL-0162 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 UniProt NaN NaN
89245 SGD S000005380 TAT2 NaN GO:0016020 SGD_REF:S000148669 IEA UniProtKB-KW:KW-0472 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 UniProt NaN NaN
89246 SGD S000005380 TAT2 NaN GO:0005783 SGD_REF:S000183046|PMID:26928762 IDA NaN C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20160603 SGD NaN NaN
89247 SGD S000005380 TAT2 NaN GO:0015827 SGD_REF:S000044967|PMID:7523855 IMP NaN P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20071128 SGD NaN NaN
89248 SGD S000005380 TAT2 NaN GO:0005300 SGD_REF:S000044967|PMID:7523855 IMP NaN F High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20071128 SGD NaN NaN
89249 SGD S000005380 TAT2 NaN GO:0006810 SGD_REF:S000148669 IEA UniProtKB-KW:KW-0813 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 UniProt NaN NaN
89250 SGD S000005380 TAT2 NaN GO:0005887 SGD_REF:S000185201 IBA PANTHER:PTN000208210 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20150304 GO_Central NaN NaN
89251 SGD S000005380 TAT2 NaN GO:0015297 SGD_REF:S000185201 IBA PANTHER:PTN000208210 F High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20150304 GO_Central NaN NaN
89252 SGD S000005380 TAT2 NaN GO:0005773 SGD_REF:S000185201 IBA PANTHER:PTN000926881 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20150304 GO_Central NaN NaN
89253 SGD S000005380 TAT2 NaN GO:0006865 SGD_REF:S000148669 IEA UniProtKB-KW:KW-0029 P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 UniProt NaN NaN
89254 SGD S000005380 TAT2 NaN GO:0016021 SGD_REF:S000148669 IEA UniProtKB-KW:KW-0812 C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20170107 UniProt NaN NaN
89255 SGD S000005380 TAT2 NaN GO:0005886 SGD_REF:S000043244|PMID:10654085 ISS NaN C High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20020806 SGD NaN NaN
89256 SGD S000005380 TAT2 NaN GO:0015801 SGD_REF:S000043244|PMID:10654085 IDA NaN P High affinity tryptophan and tyrosine permease YOL020W|LTG3|SAB2|SCM2|TAP2|aromatic amino aci... gene taxon:559292 20020806 SGD NaN NaN

In [31]:
q1 = np.array(l1)
q2 = np.array(l2)

In [32]:
for q in q1:
    print q
print
for q in q2:
    print q


YJR058C
YHR105W
YBR135W
YBR160W
YNL037C
YGL156W
YJL138C
YBR254C
YLR102C
YOR379C
YIR018W
YOR136W
YPL128C
YKR059W
YHR112C
YPL280W
YML042W
YDR421W
YOR155C
YHR113W
YNL044W
YLR021W
YJR021C
YLR347C
YFL010C
YMR096W
YDL164C
YDR027C
YML108W
YGL015C
YLR442C
YNL098C
YFL059W
YDL066W
YFL017WA
YCL033C
YIL053W
YBR176W
YMR322C
YER062C
YMR276W
YPL201C
YKL090W
YDR256C
YPL091W
YLR245C
YMR218C
YDR122W
YGL213C
YMR308C
YOR097C
YGL071W
YGR024C
YHR156C
YNL334C
YNL316C
YPL088W
YGL037C
YKL103C
YBL037W
YGL175C
YIL119C
YLR315W
YOL145C
YHL009C
YNL222W
YDR189W
YGR108W
YKR068C
YFL039C
YGR144W
YIL051C
YPL125W
YGL187C
YDR244W
YJR052W
YBR107C
YPR054W
YPR049C
YDR502C
YBL057C
YPR062W
YHR039C
YOR391C
YDR480W
YMR033W
YJR112W
YBR252W
YGR058W
YPR193C
YOR115C
YPL110C
YGR267C
YOL082W
YNL138W
YDR472W
YGR179C
YPL251W
YFL029C
YJL199C
YML077W
YMR092C
YIL082W
YPR160W
YLR377C
YNL107W
YOR380W
YFL060C
YJL104W
YJL103C
YHL018W
YOR095C
YOL054W
YDR518W
YLR319C
YNL153C
YJR010W
YLR026C
YMR095C

YJL155C
YNR046W
YBR190W
YDL238C
YBL051C
YLR257W
YJR047C
YIL151C
YIR015W
YLR322W
YJR046W
YBL101WA
YDR488C
YLR121C
YLR465C
YLR321C
YIR037W
YHR129C
YLR345W
YJL162C
YLR216C
YLR389C
YBR221C
YDR106W
YML035C
YEL034W
YJL070C
YMR168C
YBR111C
YKR096W
YIL132C
YLR376C
YLR117C
YDR504C
YHR068W
YDL111C
YBR175W
YBR284W
YBR244W
YAR003W
YDR140W
YJR008W
YDR424C
YGR158C
YPL070W

In [33]:
# for additional export to graphviz just specify the gvfile argument
# the show argument keeps the graph reasonably small
df1 = goenrich.enrich.analyze(O, q1, background_attribute)
df1 = df1.sort_values('q').dropna()
df1 = df1.loc[df1['rejected'] == 0.0]
df1 = df1.loc[df1['x'] > 0]

df2 = goenrich.enrich.analyze(O, q2, background_attribute)
df2 = df2.sort_values('q').dropna()
df2 = df2.loc[df2['rejected'] == 0.0]
df2 = df2.loc[df2['x'] > 0]

In [34]:
df1


Out[34]:
M N n name namespace p q rejected term x
29044 6448 119 5 cystathionine gamma-synthase activity molecular_function 0.003257 0.429369 0.0 GO:0003962 1
38848 6448 119 15 pteridine-containing compound biosynthetic pro... biological_process 0.030318 0.429369 0.0 GO:0042559 1
38847 6448 119 19 pteridine-containing compound metabolic process biological_process 0.047076 0.429369 0.0 GO:0042558 1
34339 6448 119 20 cysteine metabolic process biological_process 0.051689 0.429369 0.0 GO:0006534 1
33933 6448 119 12 homocysteine metabolic process biological_process 0.019754 0.429369 0.0 GO:0050667 1
18286 6448 119 10 carbon-sulfur lyase activity molecular_function 0.013797 0.429369 0.0 GO:0016846 1
3702 6448 119 5 cystathionine gamma-lyase activity molecular_function 0.003257 0.429369 0.0 GO:0004123 1
4729 6448 119 7 aryl-alcohol dehydrogenase (NAD+) activity molecular_function 0.006676 0.429369 0.0 GO:0018456 1
23198 6448 119 36 hydro-lyase activity molecular_function 0.142097 0.433662 0.0 GO:0016836 1
6869 6448 119 44 pyridoxal phosphate binding molecular_function 0.194635 0.455170 0.0 GO:0030170 1
3628 6448 119 44 sulfur amino acid biosynthetic process biological_process 0.194635 0.455170 0.0 GO:0000097 1
41812 6448 119 44 vitamin B6 binding molecular_function 0.194635 0.455170 0.0 GO:0070279 1
13438 6448 119 45 serine family amino acid metabolic process biological_process 0.201378 0.456891 0.0 GO:0009069 1
8738 6448 119 47 transferase activity, transferring alkyl or ar... molecular_function 0.214944 0.460424 0.0 GO:0016765 1
24572 6448 119 47 methionine metabolic process biological_process 0.214944 0.460424 0.0 GO:0006555 1
23197 6448 119 47 carbon-oxygen lyase activity molecular_function 0.214944 0.460424 0.0 GO:0016835 1
3629 6448 119 50 sulfur amino acid metabolic process biological_process 0.235441 0.482982 0.0 GO:0000096 1
21384 6448 119 54 cellular aldehyde metabolic process biological_process 0.262927 0.504500 0.0 GO:0006081 1
37288 6448 119 59 vitamin binding molecular_function 0.297304 0.526296 0.0 GO:0019842 1
6725 6448 119 115 lyase activity molecular_function 0.356954 0.578014 0.0 GO:0016829 2
7476 6448 119 73 prospore membrane cellular_component 0.391610 0.604663 0.0 GO:0005628 1
9027 6448 119 73 ascospore-type prospore cellular_component 0.391610 0.604663 0.0 GO:0042764 1
9032 6448 119 73 intracellular immature spore cellular_component 0.391610 0.604663 0.0 GO:0042763 1
6021 6448 119 80 oxidoreductase activity, acting on the CH-OH g... molecular_function 0.436723 0.644543 0.0 GO:0016616 1
37100 6448 119 84 aspartate family amino acid metabolic process biological_process 0.461694 0.668550 0.0 GO:0009066 1
6019 6448 119 86 oxidoreductase activity, acting on CH-OH group... molecular_function 0.473938 0.679948 0.0 GO:0016614 1
5117 6448 119 87 sulfur compound biosynthetic process biological_process 0.479998 0.682674 0.0 GO:0044272 1
11167 6448 119 99 coenzyme biosynthetic process biological_process 0.549294 0.746683 0.0 GO:0009108 1
14303 6448 119 130 alpha-amino acid biosynthetic process biological_process 0.697301 0.864811 0.0 GO:1901607 1
22978 6448 119 133 cofactor biosynthetic process biological_process 0.709276 0.873489 0.0 GO:0051188 1
18930 6448 119 138 cellular amino acid biosynthetic process biological_process 0.728362 0.890925 0.0 GO:0008652 1
20221 6448 119 147 sulfur compound metabolic process biological_process 0.760068 0.915304 0.0 GO:0006790 1
11206 6448 119 163 cofactor binding molecular_function 0.808624 0.949017 0.0 GO:0048037 1
7116 6448 119 179 coenzyme metabolic process biological_process 0.848305 0.976860 0.0 GO:0006732 1
10436 6448 119 452 oxidation-reduction process biological_process 0.998362 0.999936 0.0 GO:0055114 1
11620 6448 119 317 small molecule biosynthetic process biological_process 0.982954 0.999936 0.0 GO:0044283 1
11560 6448 119 223 cofactor metabolic process biological_process 0.922048 0.999936 0.0 GO:0051186 1
13680 6448 119 435 carboxylic acid metabolic process biological_process 0.997782 0.999936 0.0 GO:0019752 1
14301 6448 119 206 alpha-amino acid metabolic process biological_process 0.898756 0.999936 0.0 GO:1901605 1
21385 6448 119 453 organic acid metabolic process biological_process 0.998391 0.999936 0.0 GO:0006082 1
33664 6448 119 452 oxoacid metabolic process biological_process 0.998362 0.999936 0.0 GO:0043436 1
28721 6448 119 353 oxidoreductase activity molecular_function 0.990728 0.999936 0.0 GO:0016491 1
1629 6448 119 207 organic acid biosynthetic process biological_process 0.900287 0.999936 0.0 GO:0016053 1
8419 6448 119 207 carboxylic acid biosynthetic process biological_process 0.900287 0.999936 0.0 GO:0046394 1
40923 6448 119 267 cellular amino acid metabolic process biological_process 0.961190 0.999936 0.0 GO:0006520 1

In [16]:
pd.merge(df1, df2, on=['M', 'N', 'n', 'name', 'namespace', 'term'], how='inner')


Out[16]:
M N n name namespace p_x q_x rejected_x term x_x p_y q_y rejected_y x_y
0 6401 56 56 positive regulation of response to stimulus biological_process 0.085798 0.210427 0.0 GO:0048584 1 0.085798 0.211483 0.0 1
1 6401 56 58 regulation of growth biological_process 0.091096 0.211036 0.0 GO:0040008 1 0.091096 0.212521 0.0 1
2 6401 56 129 condensed chromosome cellular_component 0.102753 0.221501 0.0 GO:0000793 2 0.312062 0.413093 0.0 1
3 6401 56 69 spindle cellular_component 0.121795 0.235872 0.0 GO:0005819 1 0.121795 0.236025 0.0 1
4 6401 56 75 transcription cofactor activity molecular_function 0.139479 0.249051 0.0 GO:0003712 1 0.139479 0.249353 0.0 1
5 6401 56 151 developmental process involved in reproduction biological_process 0.145124 0.255507 0.0 GO:0003006 2 0.382425 0.476899 0.0 1
6 6401 56 255 cellular bud cellular_component 0.182871 0.287851 0.0 GO:0005933 3 0.660165 0.720536 0.0 1
7 6401 56 346 single organism reproductive process biological_process 0.183832 0.288572 0.0 GO:0044702 4 0.814314 0.851276 0.0 1
8 6401 56 269 meiotic cell cycle process biological_process 0.208022 0.310278 0.0 GO:1903046 3 0.689067 0.746278 0.0 1
9 6401 56 488 reproduction biological_process 0.252666 0.353181 0.0 GO:0000003 5 0.811360 0.848790 0.0 2
10 6401 56 111 condensed nuclear chromosome cellular_component 0.253429 0.353913 0.0 GO:0000794 1 0.253429 0.356445 0.0 1
11 6401 56 206 chromosomal region cellular_component 0.268796 0.368563 0.0 GO:0098687 2 0.542748 0.619203 0.0 1
12 6401 56 310 meiotic cell cycle biological_process 0.286589 0.383332 0.0 GO:0051321 3 0.762466 0.808252 0.0 1
13 6401 56 127 chromatin silencing biological_process 0.305569 0.403773 0.0 GO:0006342 1 0.305569 0.408164 0.0 1
14 6401 56 128 regulation of signal transduction biological_process 0.308817 0.405976 0.0 GO:0009966 1 0.308817 0.409806 0.0 1
15 6401 56 133 regulation of signaling biological_process 0.325012 0.422086 0.0 GO:0023051 1 0.325012 0.425082 0.0 1
16 6401 56 134 gene silencing biological_process 0.328241 0.424969 0.0 GO:0016458 1 0.328241 0.427598 0.0 1
17 6401 56 135 regulation of cell communication biological_process 0.331467 0.428769 0.0 GO:0010646 1 0.331467 0.431228 0.0 1
18 6401 56 135 microtubule cytoskeleton cellular_component 0.331467 0.428769 0.0 GO:0015630 1 0.331467 0.431228 0.0 1
19 6401 56 137 transcription factor activity, transcription f... molecular_function 0.337905 0.435115 0.0 GO:0000989 1 0.117517 0.231733 0.0 2
20 6401 56 235 cytoskeletal part cellular_component 0.338735 0.435115 0.0 GO:0044430 2 0.615315 0.685252 0.0 1
21 6401 56 145 negative regulation of cellular component orga... biological_process 0.363476 0.457722 0.0 GO:0051129 1 0.133034 0.245950 0.0 2
22 6401 56 248 cytoskeleton cellular_component 0.370175 0.464175 0.0 GO:0005856 2 0.644949 0.707875 0.0 1
23 6401 56 362 mitotic cell cycle process biological_process 0.391279 0.482430 0.0 GO:1903047 3 0.834003 0.868471 0.0 1
24 6401 56 155 transcription factor activity, protein binding molecular_function 0.394933 0.485400 0.0 GO:0000988 1 0.153383 0.261302 0.0 2
25 6401 56 472 reproductive process biological_process 0.397358 0.487682 0.0 GO:0022414 4 0.792598 0.833302 0.0 2
26 6401 56 157 nuclear envelope cellular_component 0.401145 0.490696 0.0 GO:0005635 1 0.401145 0.492740 0.0 1
27 6401 56 373 mitotic cell cycle biological_process 0.413449 0.503240 0.0 GO:0000278 3 0.846450 0.878639 0.0 1
28 6401 56 176 regulation of response to stimulus biological_process 0.458630 0.542553 0.0 GO:0048583 1 0.458630 0.546721 0.0 1
29 6401 56 295 cell division biological_process 0.480811 0.562919 0.0 GO:0051301 2 0.737489 0.787153 0.0 1
30 6401 56 189 mRNA binding molecular_function 0.496155 0.576873 0.0 GO:0003729 1 0.496155 0.579605 0.0 1
31 6401 56 302 developmental process biological_process 0.496607 0.576873 0.0 GO:0032502 2 0.749405 0.795838 0.0 1
32 6401 56 192 intracellular signal transduction biological_process 0.504586 0.583341 0.0 GO:0035556 1 0.504586 0.585869 0.0 1
33 6401 56 309 nuclear division biological_process 0.512174 0.589110 0.0 GO:0000280 2 0.760865 0.807368 0.0 1
34 6401 56 198 mitotic nuclear division biological_process 0.521183 0.598073 0.0 GO:0007067 1 0.521183 0.601115 0.0 1
35 6401 56 317 regulation of organelle organization biological_process 0.529668 0.605922 0.0 GO:0033043 2 0.529668 0.608519 0.0 2
36 6401 56 435 regulation of cellular component organization biological_process 0.534164 0.608938 0.0 GO:0051128 3 0.743467 0.791528 0.0 2
37 6401 56 320 organelle fission biological_process 0.536142 0.610103 0.0 GO:0048285 2 0.777979 0.822626 0.0 1
38 6401 56 351 negative regulation of biosynthetic process biological_process 0.600094 0.667576 0.0 GO:0009890 2 0.191397 0.295722 0.0 4
39 6401 56 354 negative regulation of nitrogen compound metab... biological_process 0.605982 0.673110 0.0 GO:0051172 2 0.195991 0.300049 0.0 4
40 6401 56 368 nucleolus cellular_component 0.632725 0.694495 0.0 GO:0005730 2 0.840899 0.874695 0.0 1
41 6401 56 291 signal transduction biological_process 0.730471 0.776850 0.0 GO:0007165 1 0.730471 0.780793 0.0 1
42 6401 56 291 single organism signaling biological_process 0.730471 0.776850 0.0 GO:0044700 1 0.730471 0.780793 0.0 1
43 6401 56 291 signaling biological_process 0.730471 0.776850 0.0 GO:0023052 1 0.730471 0.780793 0.0 1
44 6401 56 433 negative regulation of cellular metabolic process biological_process 0.740574 0.785046 0.0 GO:0031324 2 0.328057 0.427547 0.0 4
45 6401 56 436 chromosomal part cellular_component 0.744904 0.788785 0.0 GO:0044427 2 0.333350 0.431904 0.0 4
46 6401 56 301 nuclear chromosome part cellular_component 0.747731 0.790926 0.0 GO:0044454 1 0.747731 0.794633 0.0 1
47 6401 56 326 chromatin organization biological_process 0.786866 0.828753 0.0 GO:0006325 1 0.786866 0.830236 0.0 1
48 6401 56 331 nuclear chromosome cellular_component 0.794037 0.833328 0.0 GO:0000228 1 0.559465 0.633372 0.0 2
49 6401 56 476 chromosome cellular_component 0.797424 0.835100 0.0 GO:0005694 2 0.234963 0.338549 0.0 5
50 6401 56 487 negative regulation of macromolecule metabolic... biological_process 0.810230 0.845507 0.0 GO:0010605 2 0.424038 0.514430 0.0 4
51 6401 56 388 endoplasmic reticulum membrane cellular_component 0.862087 0.892357 0.0 GO:0005789 1 0.862087 0.891105 0.0 1
52 6401 56 398 cellular macromolecule catabolic process biological_process 0.871704 0.899469 0.0 GO:0044265 1 0.871704 0.897584 0.0 1
53 6401 56 401 nuclear outer membrane-endoplasmic reticulum m... cellular_component 0.874470 0.901692 0.0 GO:0042175 1 0.874470 0.899803 0.0 1
54 6401 56 406 cell communication biological_process 0.878960 0.904905 0.0 GO:0007154 1 0.878960 0.902071 0.0 1
55 6401 56 411 positive regulation of biosynthetic process biological_process 0.883304 0.907943 0.0 GO:0009891 1 0.707089 0.760489 0.0 2
56 6401 56 412 endoplasmic reticulum part cellular_component 0.884156 0.908502 0.0 GO:0044432 1 0.884156 0.905976 0.0 1
57 6401 56 422 positive regulation of nitrogen compound metab... biological_process 0.892370 0.915666 0.0 GO:0051173 1 0.509691 0.590169 0.0 3
58 6401 56 433 macromolecule catabolic process biological_process 0.900792 0.922701 0.0 GO:0009057 1 0.900792 0.920463 0.0 1
59 6401 56 487 positive regulation of macromolecule metabolic... biological_process 0.934048 0.948200 0.0 GO:0010604 1 0.625802 0.693938 0.0 3

In [71]:
df


Out[71]:
M N n name namespace p q rejected term x
26805 6401 85 4 monopolin complex cellular_component 0.000009 0.026402 1.0 GO:0033551 2
9772 6401 85 7 nucleolus organization biological_process 0.000076 0.112211 0.0 GO:0007000 2
23972 6401 85 9 TORC2 complex cellular_component 0.000179 0.176111 0.0 GO:0031932 2
8593 6401 85 6 transcription factor activity, RNA polymerase ... molecular_function 0.077105 0.301874 0.0 GO:0001004 0
26800 6401 85 5 histone deacetylase binding molecular_function 0.064675 0.301874 0.0 GO:0042826 0
11208 6401 85 5 positive regulation of intracellular transport biological_process 0.064675 0.301874 0.0 GO:0032388 0
11213 6401 85 28 regulation of intracellular transport biological_process 0.052713 0.301874 0.0 GO:0032386 1
26768 6401 85 8 response to oxygen levels biological_process 0.101478 0.301874 0.0 GO:0070482 0
11270 6401 85 8 protein phosphorylated amino acid binding molecular_function 0.101478 0.301874 0.0 GO:0045309 0
26702 6401 85 15 meiotic sister chromatid segregation biological_process 0.016358 0.301874 0.0 GO:0045144 1
26701 6401 85 35 homologous chromosome segregation biological_process 0.010890 0.301874 0.0 GO:0045143 2
26699 6401 85 6 meiotic telomere clustering biological_process 0.077105 0.301874 0.0 GO:0045141 0
11317 6401 85 6 transcription factor activity, core RNA polyme... molecular_function 0.077105 0.301874 0.0 GO:0001139 0
26682 6401 85 6 chromosome localization to nuclear envelope in... biological_process 0.077105 0.301874 0.0 GO:0090220 0
26607 6401 85 6 tRNA methyltransferase complex cellular_component 0.077105 0.301874 0.0 GO:0043527 0
26592 6401 85 5 fatty acid ligase activity molecular_function 0.064675 0.301874 0.0 GO:0015645 0
11376 6401 85 72 condensed chromosome, centromeric region cellular_component 0.070128 0.301874 0.0 GO:0000779 2
11379 6401 85 4 mating pheromone activity molecular_function 0.052080 0.301874 0.0 GO:0000772 0
11381 6401 85 7 adenyl-nucleotide exchange factor activity molecular_function 0.089372 0.301874 0.0 GO:0000774 0
26566 6401 85 28 calcium ion binding molecular_function 0.052713 0.301874 0.0 GO:0005509 1
11383 6401 85 73 kinetochore cellular_component 0.072459 0.301874 0.0 GO:0000776 2
11384 6401 85 66 condensed chromosome kinetochore cellular_component 0.056892 0.301874 0.0 GO:0000777 2
26542 6401 85 4 regulation of MAPK cascade involved in cell wa... biological_process 0.052080 0.301874 0.0 GO:1903137 0
11463 6401 85 6 glycolytic fermentation to ethanol biological_process 0.077105 0.301874 0.0 GO:0019655 0
11497 6401 85 5 phosphatidylinositol kinase activity molecular_function 0.064675 0.301874 0.0 GO:0052742 0
26477 6401 85 12 TOR complex cellular_component 0.000456 0.301874 0.0 GO:0038201 2
26475 6401 85 5 TORC2 signaling biological_process 0.001698 0.301874 0.0 GO:0038203 1
26447 6401 85 4 microtubule depolymerization biological_process 0.052080 0.301874 0.0 GO:0007019 0
26435 6401 85 8 Arp2/3 protein complex cellular_component 0.101478 0.301874 0.0 GO:0005885 0
26432 6401 85 7 nuclear microtubule cellular_component 0.089372 0.301874 0.0 GO:0005880 0
... ... ... ... ... ... ... ... ... ... ...
23751 6401 85 383 cellular response to chemical stimulus biological_process 0.994909 0.998977 0.0 GO:0070887 0
24269 6401 85 405 regulation of molecular function biological_process 0.996279 0.998977 0.0 GO:0065009 0
22325 6401 85 348 rRNA processing biological_process 0.991636 0.998977 0.0 GO:0006364 0
32893 6401 85 317 protein targeting biological_process 0.987050 0.998977 0.0 GO:0006605 0
17373 6401 85 327 homeostatic process biological_process 0.988751 0.998977 0.0 GO:0042592 0
5819 6401 85 368 carbohydrate derivative metabolic process biological_process 0.993700 0.998977 0.0 GO:1901135 0
5693 6401 85 417 substrate-specific transporter activity molecular_function 0.996865 0.998977 0.0 GO:0022892 0
5690 6401 85 343 substrate-specific transmembrane transporter a... molecular_function 0.991024 0.998977 0.0 GO:0022891 0
18759 6401 85 365 hydrolase activity, acting on ester bonds molecular_function 0.993426 0.998977 0.0 GO:0016788 0
28959 6401 85 396 mitochondrial membrane cellular_component 0.995769 0.998977 0.0 GO:0031966 0
35767 6401 85 467 transmembrane transport biological_process 0.998472 0.998977 0.0 GO:0055085 0
17971 6401 85 472 ribosome biogenesis biological_process 0.998578 0.998977 0.0 GO:0042254 0
41175 6401 85 327 translational elongation biological_process 0.988751 0.998977 0.0 GO:0006414 0
13680 6401 85 435 carboxylic acid metabolic process biological_process 0.997578 0.998977 0.0 GO:0019752 0
13067 6401 85 494 intracellular protein transport biological_process 0.991507 0.998977 0.0 GO:0006886 1
28721 6401 85 353 oxidoreductase activity molecular_function 0.992208 0.998977 0.0 GO:0016491 0
7504 6401 85 395 structural molecule activity molecular_function 0.995708 0.998977 0.0 GO:0005198 0
16863 6401 85 321 establishment of protein localization to organ... biological_process 0.987759 0.998977 0.0 GO:0072594 0
33664 6401 85 452 oxoacid metabolic process biological_process 0.998103 0.998977 0.0 GO:0043436 0
24838 6401 85 402 transferase complex cellular_component 0.996116 0.998977 0.0 GO:1990234 0
1634 6401 85 407 organophosphate metabolic process biological_process 0.996384 0.998977 0.0 GO:0019637 0
10436 6401 85 452 oxidation-reduction process biological_process 0.998103 0.998977 0.0 GO:0055114 0
15908 6401 85 373 base pairing molecular_function 0.994131 0.998977 0.0 GO:0000496 0
27263 6401 85 380 transmembrane transporter activity molecular_function 0.994687 0.998977 0.0 GO:0022857 0
16757 6401 85 387 regulation of catalytic activity biological_process 0.995191 0.998977 0.0 GO:0050790 0
25303 6401 85 475 transporter activity molecular_function 0.998638 0.998977 0.0 GO:0005215 0
21385 6401 85 453 organic acid metabolic process biological_process 0.998130 0.998977 0.0 GO:0006082 0
13085 6401 85 315 mRNA metabolic process biological_process 0.986680 0.998977 0.0 GO:0016071 0
22505 6401 85 385 vesicle-mediated transport biological_process 0.995052 0.998977 0.0 GO:0016192 0
3593 6401 85 498 DNA metabolic process biological_process 0.999024 0.999024 0.0 GO:0006259 0

2948 rows × 10 columns


In [72]:
# generate html
df.dropna().head(n = 20).to_html('q2.html')
# df.head(n = 20).to_html('example.html')

In [ ]: