In [1]:
from goatools.base import download_ncbi_associations
# fin -> Filename of input file (file to be read)
fin_gene2go = download_ncbi_associations()
In [2]:
from goatools.anno.genetogo_reader import Gene2GoReader
objanno_hsa = Gene2GoReader(fin_gene2go, taxids=[9606])
In [3]:
objanno_all = Gene2GoReader(fin_gene2go, taxids=True)
In [4]:
ns2assc_hsa1 = objanno_hsa.get_ns2assc()
In [5]:
from itertools import chain
def prt_assc_counts(ns2assc):
"""Print the number of genes and GO IDs in an association"""
for nspc, gene2goids in sorted(ns2assc.items()):
print("{NS} {N:6,} genes, {GOs:6,} GOs".format(
NS=nspc, N=len(gene2goids), GOs=len(set.union(*gene2goids.values()))))
In [6]:
prt_assc_counts(ns2assc_hsa1)
In [7]:
ns2assc_hsa2 = objanno_all.get_ns2assc(9606)
prt_assc_counts(ns2assc_hsa2)
In [8]:
ns2assc_mmu = objanno_all.get_ns2assc(10090)
prt_assc_counts(ns2assc_mmu)
In [9]:
ns2assc_two = objanno_all.get_ns2assc({9606, 10090})
prt_assc_counts(ns2assc_two)
In [10]:
ns2assc_all = objanno_all.get_ns2assc(True)
prt_assc_counts(ns2assc_all)
In [11]:
ns2assc_all = objanno_all.get_ns2assc()
In [12]:
print(ns2assc_all)