Wikidata includes links between entities using predicates such as SubClassOf (P279). These form a classification hierarchy, although as this comes from multiple sources, it may not conform to the same rules as ontology hierarchies.
OntoBio includes a wikidata ontology factory, so we can transparently create an Ontology object from wikidata, and leverage the same methods available in ontobio.
This example is focused around Anxiety disorders
In [1]:
from ontobio.ontol_factory import OntologyFactory
f = OntologyFactory()
## OntologyFactory recognizes the prefix wdq for wikidata queries;
## We use this to make a sub-ontology
## (currently we have no lazy wrapper for WD, only Eager, so we limit the size)
ont = f.create('wdq:Q544006') # Anxiety disorder
In [2]:
## Find terms starting with Anxiety in the sub-ontology
qids = ont.search('Anxiety%')
qids
Out[2]:
In [3]:
## Traverse up and down from query node in our sub-ontology
nodes = ont.traverse_nodes(qids, up=True, down=True)
labels = [ont.label(n) for n in nodes]
labels[:25]
Out[3]:
In [16]:
## Test for cycles
import networkx as nx
g = ont.get_graph()
def show_cycle(nl):
print(["{} {}".format(n, ont.label(n)) for n in nl])
cycles_list = list(nx.simple_cycles(g))
show_cycle(cycles_list[0])
In [5]:
## Show our extract of the sub-ontology as an ascii tree
## (note this is resilient to cycles)
## only traverse down from our query nodes
## (including ancestors causes multiple paths, and a verbose display)
nodes = ont.traverse_nodes(qids, up=False, down=True)
from ontobio.io.ontol_renderers import GraphRenderer
w = GraphRenderer.create('tree')
w.write_subgraph(ont, nodes, query_ids=qids)
In [6]:
## Show as graph using GraphViz
## We can do this for both descendants and ancestors
nodes = ont.traverse_nodes(qids, up=True, down=True)
w = GraphRenderer.create('png')
w.outfile = 'output/anxiety-disorder.png'
w.write_subgraph(ont, nodes, query_ids=qids)
In [4]:
## What proteins are associated with PTSD? (via GWAS)
[ptsd] = ont.search('post-traumatic stress disorder')
import ontobio.sparql.wikidata as wd
proteins = wd.canned_query('disease2protein', ptsd)
In [5]:
proteins
Out[5]:
In [10]:
## Find GO terms for all genes/products associated with all nodes in Anxiety sub-ontology
## First create a GO handle and get association sets for GO (in human)
go = f.create('go')
from ontobio.assoc_factory import AssociationSetFactory
afactory = AssociationSetFactory()
aset = afactory.create(ontology=go,
subject_category='gene',
object_category='function',
taxon='NCBITaxon:9606')
In [19]:
for n in ont.nodes():
proteins = wd.canned_query('disease2protein', n)
anns = [a for p in proteins for a in aset.annotations(p)]
if len(anns) > 0:
print("{} {}".format(n,ont.label(n)))
for a in anns:
print(" {} {}".format(a, go.label(a)))
In [ ]: