Explore Mappings (xrefs) from Uberon

Draft notebook showing the xref graph functionality of OntoBio


In [1]:
## First fetch ontology
from ontobio.ontol_factory import OntologyFactory

ofactory = OntologyFactory()
ont = ofactory.create("uberon")  ## Connect remotely to Uberon over SPARQL
##
## Note: Jupyter may show '*' to indicate kernel busy while this is being
## fetched - should only take a few seconds. Wait before proceeding

In [2]:
## select a class
[cls] = ont.search("buccal mucosa")
cls


Out[2]:
'UBERON:0006956'

In [4]:
## Get xrefs for a class
ont.xrefs(cls)


Out[4]:
['FMA:59785',
 'http://linkedlifedata.com/resource/umls/id/C1578559',
 'UMLS:C1578559',
 'http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C12505',
 'CALOHA:TS-2349',
 'http://www.snomedbrowser.com/Codes/Details/16811007',
 'BTO:0003833']

In [64]:
## Get a networkx graph object
xg = ont.xref_graph
len(xg.edges())


Out[64]:
100535

In [16]:
## Hacky convenience function to deal with URL xrefs
## TODO: use prefixcommons

def contract_xref(x):
    x = x.replace('http://linkedlifedata.com/resource/umls/id/','UMLS:')
    x = x.replace('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#', 'NCIT:')
    x = x.replace('http://www.snomedbrowser.com/Codes/Details/', 'SCTID:')
    return x

[contract_xref(x) for x in ont.xrefs(cls)]


Out[16]:
['FMA:59785',
 'UMLS:C1578559',
 'UMLS:C1578559',
 'NCIT:C12505',
 'CALOHA:TS-2349',
 'SCTID:16811007',
 'BTO:0003833']

In [54]:
## Prepare to make a DataFrame
## Each item is a class
## Each column is a database
import pandas as pd
items = []
for c in ont.nodes():
    label = ont.label(c)
    d = dict(id=c, label=label)
    items.append(d)
    for x in ont.xrefs(c):
        toks = contract_xref(x).split(":")
        if len(toks) == 2:
            d[toks[0]] = toks[1]

In [55]:
## Make dataframe
df = pd.DataFrame.from_records(items, index=['id','label'])
df=df.fillna('')
df[0:20]


Out[55]:
AAO ABA AEO AEO_RETIRED ANISEED BAMS BILA BILS BM BSA ... XtroDO ZFA ZFA_RETIRED ZFS galen http https ncithesaurus nodeID span
id label
UBERON:3010014 inguinal glands ...
UBERON:2000723 obsolete slow muscle cell somite 5 ... 0000723
UBERON:0000113 post-juvenile adult stage 0000113 ... 0000044
UBERON:0006590 remnant of embryonic structure ...
UBERON:0024559 obsolete predominantly gray regional part of habenula ...
UBERON:2001409 infraorbital 4 ... 0001409
UBERON:0005587 rhombomere 7 roof plate ... 0000951
UBERON:4300088 metapterygium bone ...
UBERON:0023317 obsolete regional part of ventral cochlear nucleus ...
UBERON:0002929 dentate gyrus pyramidal layer ...
UBERON:0000011 parasympathetic nervous system 0010488 ... 0001575 //en.wikipedia.org/wiki/Parasympathetic_nervou...
UBERON:0000325 gastric gland ...
UBERON:0010884 forelimb bone pre-cartilage condensation ...
UBERON:0002693 occipitotemporal sulcus ots Tel-Cx-OTS ... //braininfo.rprc.washington.edu/centraldirecto...
UBERON:0001536 left common carotid artery plus branches ... //en.wikipedia.org/wiki/Left_common_carotid_ar...
UBERON:0004773 superior eyelid tarsus ... //en.wikipedia.org/wiki/Tarsus_%28eyelids%29
UBERON:0009138 right common cardinal vein ...
UBERON:0001440 forelimb skeleton 0000202 ...
UBERON:0006813 nasal skeleton 0000320 ...
UBERON:0005373 spinal cord dorsal column dc ... //braininfo.rprc.washington.edu/centraldirecto...

20 rows × 98 columns


In [60]:
## Fetch sample of NCIT mappings
df['NCIT'][0:20]


Out[60]:
id              label                                                
UBERON:3010014  inguinal glands                                                
UBERON:2000723  obsolete slow muscle cell somite 5                             
UBERON:0000113  post-juvenile adult stage                                      
UBERON:0006590  remnant of embryonic structure                                 
UBERON:0024559  obsolete predominantly gray regional part of habenula          
UBERON:2001409  infraorbital 4                                                 
UBERON:0005587  rhombomere 7 roof plate                                        
UBERON:4300088  metapterygium bone                                             
UBERON:0023317  obsolete regional part of ventral cochlear nucleus             
UBERON:0002929  dentate gyrus pyramidal layer                                  
UBERON:0000011  parasympathetic nervous system                           C12764
UBERON:0000325  gastric gland                                                  
UBERON:0010884  forelimb bone pre-cartilage condensation                       
UBERON:0002693  occipitotemporal sulcus                                        
UBERON:0001536  left common carotid artery plus branches                 C32956
UBERON:0004773  superior eyelid tarsus                                         
UBERON:0009138  right common cardinal vein                                     
UBERON:0001440  forelimb skeleton                                              
UBERON:0006813  nasal skeleton                                                 
UBERON:0005373  spinal cord dorsal column                                C33355
Name: NCIT, dtype: object

In [ ]: