In [3]:
## We use a Factory object in the ontobio library
from ontobio import OntologyFactory
In [5]:
## Get the HPO using default method (currently OntoBee SPARQL)
## This may take 5-10s the first time you run it; afterwards it is cached
ofa = OntologyFactory()
ont = ofa.create('hp')
In [9]:
## The OWL version of HPO (used here) has many interesting relationship types;
## for now we just care about is-a (subClassOf between named classes)
ont = ont.subontology(relations='subClassOf')
In [13]:
## Get the root of the abnormality subset
[root] = ont.search('Phenotypic abnormality')
root
Out[13]:
In [15]:
## Arbitrary term
[t] = ont.search('Clinodactyly of the 3rd finger')
t
Out[15]:
In [18]:
## We use the standard python networkx library for pathfinding here
## This is easily extracted from an ontology object
from networkx import nx
G = ont.get_graph()
G
Out[18]:
In [21]:
## number of paths
## (for the mapping of networkx to an ontology, source is root, and descendant is target)
len(list(nx.all_simple_paths(G, root, t)))
Out[21]:
In [22]:
## nx returns a list of lists, each list is a path
## Examine the first 2
list(nx.all_simple_paths(G, root, t))[0:2]
Out[22]:
In [45]:
def get_pathstats(nodes):
"""
for any given node, return a table row with stats
"""
items = []
for n in nodes:
paths = list(nx.all_simple_paths(G, root, n))
longest = len(max(paths, key=lambda p: len(p)))
items.append({'id':n,
'label': ont.label(n),
'pathcount': len(paths),
'longest': longest})
return items
## Test it out
sample = list(ont.descendants(root))[0:20]
items = get_pathstats(sample)
items[0:3]
Out[45]:
In [46]:
## Look at same table in pandas
import pandas as pd
df = pd.DataFrame(items)
df
Out[46]:
In [49]:
## Basic aggregate stats (over our small sample, which may not be representative)
df['pathcount'].mean()
Out[49]:
In [50]:
import plotly.plotly as py
import plotly.graph_objs as go
In [51]:
data = [
go.Bar(
x=df['label'], # assign x as the dataframe column 'x'
y=df['pathcount']
)
]
# IPython notebook
py.iplot(data, filename='pandas-bar-chart')
# use this in non-notebook context
# url = py.plot(data, filename='pandas-bar-chart')
Out[51]:
In [52]:
sample = list(ont.descendants(root))
items = get_pathstats(sample)
items[0:3]
Out[52]:
In [53]:
len(items)
Out[53]:
In [54]:
df = pd.DataFrame(items)
In [55]:
df['pathcount'].mean()
Out[55]:
In [56]:
df['pathcount'].max()
Out[56]:
In [57]:
data = [
go.Bar(
x=df['label'], # assign x as the dataframe column 'x'
y=df['pathcount']
)
]
# IPython notebook
py.iplot(data, filename='pandas-bar-chart-all')
Out[57]:
In [59]:
data = [
go.Scatter(
x=df['longest'], # assign x as the dataframe column 'x'
y=df['pathcount'],
mode = 'markers'
)
]
# IPython notebook
py.iplot(data, filename='pandas-longest-vs-numpaths')
Out[59]:
In [61]:
max_num_paths = df['pathcount'].max()
nodes_with_max = [x['id'] for x in items if x['pathcount'] == max_num_paths]
nodes_with_max
Out[61]:
In [62]:
[ont.label(n) for n in nodes_with_max]
Out[62]:
In [70]:
len(nodes_with_max)
Out[70]:
In [71]:
## Pick an arbitrary term from list
t = nodes_with_max[0]
In [77]:
ancs = ont.ancestors(t, reflexive=True)
ancs = [a for a in ancs if a.startswith('HP:')]
len(ancs)
Out[77]:
In [80]:
## Make a sub-ontology with just term and ancestors
subont = ont.subontology(ancs)
In [83]:
sample_path = list(nx.all_simple_paths(G, root, t))[0]
sample_path
Out[83]:
In [84]:
## Render the sub-ontology,
## highlighting a sample path
from ontobio.io.ontol_renderers import GraphRenderer
w = GraphRenderer.create('png')
w.outfile = 'output/multipath.png'
w.write(subont,query_ids=sample_path)
In [ ]: