In [1]:
from goatools.base import get_godag
godag = get_godag("go-basic.obo")
In [2]:
# Choose a deep leaf-level GO ID associated with "bacteria"
DESC = 'bacteria' # GO Term name contains this
NSPC = 'cellular_component' # Desired namespace
# Create a chooser function which returns True or False
def chooser(goterm):
"""Choose a leaf-level GO term based on its name"""
b_match = DESC in goterm.name
# True if GO term is leaf-level (has no children)
b_leaf = not goterm.children
# True if GO term is in 'cellular_component' namespace (nspc)
b_nspc = goterm.namespace == NSPC
return b_match and b_leaf and b_nspc
# Get GO terms with desired name in desired GO DAG branch
go_ids_selected = set(o.item_id for o in godag.values() if chooser(o))
print('{N} {desc} GO terms'.format(N=len(go_ids_selected), desc=DESC))
In [3]:
from goatools.gosubdag.gosubdag import GoSubDag
gosubdag = GoSubDag(go_ids_selected, godag)
In [4]:
go_id, go_term = max(gosubdag.go2obj.items(), key=lambda t: t[1].depth)
# Print GO ID, using print format in gosubdag
print(go_id, go_term.name)
In [5]:
go_ids_chosen = go_term.get_all_parents()
print('{N} ancestors for {GO} "{name}"'.format(
N=len(go_ids_chosen), GO=go_term.item_id, name=go_term.name))
The data member, go2nt, of the class, GoSubDag, is a dict where:
Additional namedtuple fields:
| field | description |
|---|---|
| NS | Namespace: BP, MF, or CC |
| level | Minimum path from the top of the branch |
| depth | Maximum path from the top of the branch |
| GO | GO ID |
| GO_name | GO Name - A short description |
| dcnt | Descendants count |
In [6]:
# Add the deep GO ID to its list of ancestors for printing
go_ids_chosen.add(go_id)
nts = [gosubdag.go2nt[go] for go in go_ids_chosen]
In [7]:
fmt_str = '{I:2}) {NS} {GO:10} {dcnt:11} D{depth:02} {GO_name}'
# Print selected GO information
print('IDX NS GO ID Descendants Count Depth Name')
print('--- -- ---------- ----------------- ----- --------------------')
for idx, nt_go in enumerate(sorted(nts, key=lambda nt: nt.depth), 1):
print(fmt_str.format(I=idx, **nt_go._asdict()))
$ ../scripts/go_plot.py GO:1990061
In [8]:
# Short alias, indicating species and the annotation filename
abc2gpad = {
'hsa': 'goa_human.gpad', # human
'mmu': 'mgi.gpad', # mouse
'dme': 'fb.gpad' # fly
}
In [9]:
import os
for abc, gpad in abc2gpad.items():
if not os.path.exists(gpad):
fin_gz = '{FILE}.gz'.format(FILE=gpad)
wget_gz = 'current.geneontology.org/annotations/{GZ}'.format(GZ=fin_gz)
!wget $wget_gz
!gunzip $fin_gz
In [10]:
from goatools.anno.factory import get_objanno
# Load each annotation into an annotation object
abc2objanno = {s:get_objanno(f, 'gpad', godag=godag) for s, f in abc2gpad.items()}
In [11]:
from goatools.semantic import TermCounts
abc2objtcnt = {s:TermCounts(godag, o.get_id2gos_nss()) for s, o in abc2objanno.items()}
In [12]:
from goatools.semantic import get_info_content
fmt_str = ('{I:2}) {NS} {GO:10} {dcnt:4} D{depth:02} '
'{hsa:6.3f} {mmu:6.3f} {dme:6.3f} '
'{GO_name}')
# Print selected GO information
print(' |<----- tinfo --->|')
print('IDX NS GO ID dcnt Depth hsa mmu dme Name')
print('--- -- ---------- ---- ------ ------ ------ ----- --------------------')
for idx, nt_go in enumerate(sorted(nts, key=lambda nt: nt.depth), 1):
abc2tinfo = {s: get_info_content(nt_go.GO, o) for s, o in abc2objtcnt.items()}
print(fmt_str.format(I=idx, **abc2tinfo, **nt_go._asdict()))
**Note: tinfo is 0.000 for 'bacterial degradosome' in human mouse and fly because GO:1990061 is not found in the annotations downloaded May 29, 2019
In [13]:
gpad_filename = abc2gpad['hsa']
print('GPAD ANNOTATION FILENAME: {GPAD}'.format(GPAD=gpad_filename))
In [14]:
# Re-read annotation file for clarity in this tutorial
annoobj = get_objanno(gpad_filename, 'gpad', godag=godag)
# Get associations (Gene ID - to - set of GO IDs) for all namespaces (BP, MF, and CC)
id2goids = annoobj.get_id2gos_nss()
# Create TermCounts object
tcntobj = TermCounts(godag, id2goids)
# Manage a subset of GO IDs using GoSubDag
# This time, load the GO term counts
gosubdag = GoSubDag(go_ids_selected, godag, tcntobj=tcntobj)
In [15]:
# Print both descendants count and information content: dcnt and tinfo
fmt_str = '{NS} {GO:10} {dcnt:4} {tinfo:6.3f} D{depth:02} {GO_name}'
print('NS GO dcnt tinfo depth name')
print('-- ---------- ----- ------ --- -----------------------')
nts = gosubdag.prt_goids(go_ids_chosen, prtfmt=fmt_str)
Copyright (C) 2016-2019, DV Klopfenstein et al. All rights reserved