Explore Mappings (xrefs) from Uberon

Draft notebook showing the xref graph functionality of OntoBio



In [1]:

    
## First fetch ontology
from ontobio.ontol_factory import OntologyFactory

ofactory = OntologyFactory()
ont = ofactory.create("uberon")  ## Connect remotely to Uberon over SPARQL
##
## Note: Jupyter may show '*' to indicate kernel busy while this is being
## fetched - should only take a few seconds. Wait before proceeding



In [2]:

    
## select a class
[cls] = ont.search("buccal mucosa")
cls









    Out[2]:





'UBERON:0006956'



In [4]:

    
## Get xrefs for a class
ont.xrefs(cls)









    Out[4]:





['FMA:59785',
 'http://linkedlifedata.com/resource/umls/id/C1578559',
 'UMLS:C1578559',
 'http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C12505',
 'CALOHA:TS-2349',
 'http://www.snomedbrowser.com/Codes/Details/16811007',
 'BTO:0003833']



In [64]:

    
## Get a networkx graph object
xg = ont.xref_graph
len(xg.edges())









    Out[64]:





100535



In [16]:

    
## Hacky convenience function to deal with URL xrefs
## TODO: use prefixcommons

def contract_xref(x):
    x = x.replace('http://linkedlifedata.com/resource/umls/id/','UMLS:')
    x = x.replace('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#', 'NCIT:')
    x = x.replace('http://www.snomedbrowser.com/Codes/Details/', 'SCTID:')
    return x

[contract_xref(x) for x in ont.xrefs(cls)]









    Out[16]:





['FMA:59785',
 'UMLS:C1578559',
 'UMLS:C1578559',
 'NCIT:C12505',
 'CALOHA:TS-2349',
 'SCTID:16811007',
 'BTO:0003833']



In [54]:

    
## Prepare to make a DataFrame
## Each item is a class
## Each column is a database
import pandas as pd
items = []
for c in ont.nodes():
    label = ont.label(c)
    d = dict(id=c, label=label)
    items.append(d)
    for x in ont.xrefs(c):
        toks = contract_xref(x).split(":")
        if len(toks) == 2:
            d[toks[0]] = toks[1]



In [55]:

    
## Make dataframe
df = pd.DataFrame.from_records(items, index=['id','label'])
df=df.fillna('')
df[0:20]









    Out[55]:







  
    
      
      
      AAO
      ABA
      AEO
      AEO_RETIRED
      ANISEED
      BAMS
      BILA
      BILS
      BM
      BSA
      ...
      XtroDO
      ZFA
      ZFA_RETIRED
      ZFS
      galen
      http
      https
      ncithesaurus
      nodeID
      span
    
    
      id
      label
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      UBERON:3010014
      inguinal glands
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:2000723
      obsolete slow muscle cell somite 5
      
      
      
      
      
      
      
      
      
      
      ...
      
      0000723
      
      
      
      
      
      
      
      
    
    
      UBERON:0000113
      post-juvenile adult stage
      
      
      
      
      
      
      
      0000113
      
      
      ...
      
      
      
      0000044
      
      
      
      
      
      
    
    
      UBERON:0006590
      remnant of embryonic structure
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0024559
      obsolete predominantly gray regional part of habenula
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:2001409
      infraorbital 4
      
      
      
      
      
      
      
      
      
      
      ...
      
      0001409
      
      
      
      
      
      
      
      
    
    
      UBERON:0005587
      rhombomere 7 roof plate
      
      
      
      
      
      
      
      
      
      
      ...
      
      0000951
      
      
      
      
      
      
      
      
    
    
      UBERON:4300088
      metapterygium bone
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0023317
      obsolete regional part of ventral cochlear nucleus
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0002929
      dentate gyrus pyramidal layer
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0000011
      parasympathetic nervous system
      0010488
      
      
      
      
      
      
      
      
      
      ...
      
      0001575
      
      
      
      //en.wikipedia.org/wiki/Parasympathetic_nervou...
      
      
      
      
    
    
      UBERON:0000325
      gastric gland
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0010884
      forelimb bone pre-cartilage condensation
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0002693
      occipitotemporal sulcus
      
      
      
      
      
      ots
      
      
      Tel-Cx-OTS
      
      ...
      
      
      
      
      
      //braininfo.rprc.washington.edu/centraldirecto...
      
      
      
      
    
    
      UBERON:0001536
      left common carotid artery plus branches
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      //en.wikipedia.org/wiki/Left_common_carotid_ar...
      
      
      
      
    
    
      UBERON:0004773
      superior eyelid tarsus
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      //en.wikipedia.org/wiki/Tarsus_%28eyelids%29
      
      
      
      
    
    
      UBERON:0009138
      right common cardinal vein
      
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0001440
      forelimb skeleton
      0000202
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0006813
      nasal skeleton
      0000320
      
      
      
      
      
      
      
      
      
      ...
      
      
      
      
      
      
      
      
      
      
    
    
      UBERON:0005373
      spinal cord dorsal column
      
      
      
      
      
      dc
      
      
      
      
      ...
      
      
      
      
      
      //braininfo.rprc.washington.edu/centraldirecto...
      
      
      
      
    
  

20 rows × 98 columns



In [60]:

    
## Fetch sample of NCIT mappings
df['NCIT'][0:20]









    Out[60]:





id              label                                                
UBERON:3010014  inguinal glands                                                
UBERON:2000723  obsolete slow muscle cell somite 5                             
UBERON:0000113  post-juvenile adult stage                                      
UBERON:0006590  remnant of embryonic structure                                 
UBERON:0024559  obsolete predominantly gray regional part of habenula          
UBERON:2001409  infraorbital 4                                                 
UBERON:0005587  rhombomere 7 roof plate                                        
UBERON:4300088  metapterygium bone                                             
UBERON:0023317  obsolete regional part of ventral cochlear nucleus             
UBERON:0002929  dentate gyrus pyramidal layer                                  
UBERON:0000011  parasympathetic nervous system                           C12764
UBERON:0000325  gastric gland                                                  
UBERON:0010884  forelimb bone pre-cartilage condensation                       
UBERON:0002693  occipitotemporal sulcus                                        
UBERON:0001536  left common carotid artery plus branches                 C32956
UBERON:0004773  superior eyelid tarsus                                         
UBERON:0009138  right common cardinal vein                                     
UBERON:0001440  forelimb skeleton                                              
UBERON:0006813  nasal skeleton                                                 
UBERON:0005373  spinal cord dorsal column                                C33355
Name: NCIT, dtype: object



In [ ]:

		AAO	BAMS	BILS	BM	...	ZFA	ZFS	http
id	label
UBERON:3010014	inguinal glands					...
UBERON:2000723	obsolete slow muscle cell somite 5					...	0000723
UBERON:0000113	post-juvenile adult stage			0000113		...		0000044
UBERON:0006590	remnant of embryonic structure					...
UBERON:0024559	obsolete predominantly gray regional part of habenula					...
UBERON:2001409	infraorbital 4					...	0001409
UBERON:0005587	rhombomere 7 roof plate					...	0000951
UBERON:4300088	metapterygium bone					...
UBERON:0023317	obsolete regional part of ventral cochlear nucleus					...
UBERON:0002929	dentate gyrus pyramidal layer					...
UBERON:0000011	parasympathetic nervous system	0010488				...	0001575		//en.wikipedia.org/wiki/Parasympathetic_nervou...
UBERON:0000325	gastric gland					...
UBERON:0010884	forelimb bone pre-cartilage condensation					...
UBERON:0002693	occipitotemporal sulcus		ots		Tel-Cx-OTS	...			//braininfo.rprc.washington.edu/centraldirecto...
UBERON:0001536	left common carotid artery plus branches					...			//en.wikipedia.org/wiki/Left_common_carotid_ar...
UBERON:0004773	superior eyelid tarsus					...			//en.wikipedia.org/wiki/Tarsus_%28eyelids%29
UBERON:0009138	right common cardinal vein					...
UBERON:0001440	forelimb skeleton	0000202				...
UBERON:0006813	nasal skeleton	0000320				...
UBERON:0005373	spinal cord dorsal column		dc			...			//braininfo.rprc.washington.edu/centraldirecto...