Dipper model API tutorial

A quick tutorial on working with the dipper models API. As input we have a dataset with gene phenotype associations, as well as a reference, evidence code, and relation.

Note to run this notebook you must first install dipper in your virtualenv: python3 setup.py install


In [24]:
from dipper.graph.StreamedGraph import StreamedGraph
from dipper.graph.RDFGraph import RDFGraph
from dipper.models.Model import Model
from dipper.models.assoc.Association import Assoc as Association
import pandas as pd


columns = ['variant', 'variant_label', 'variant_type',
           'phenotype','relation', 'source', 'evidence', 'dbxref']

data =  [
    ['ClinVarVariant:254143', 'C326F', 'SO:0000694',
     'HP:0000748','RO:0002200', 'PMID:12503095', 'ECO:0000220',
     'dbSNP:886037891']
]

# Initialize graph, here we demo the RDFGraph
# which is a subclass of RDFLib.graph()
graph = RDFGraph()

# Our model class writes to the graph and takes
# a graph object as it's only instance variable
model = Model(graph)

# Create a pandas dataframe
dataframe = pd.DataFrame(data=data, columns=columns)

for index, row in dataframe.iterrows():
    # Add the triple ClinVarVariant:254143 RO:0002200 HP:0000504
    # RO:0002200 is the has_phenotype relation
    # HP:0000748 is the phenotype 'Inappropriate laughter', haha
    model.addTriple(row['variant'], row['relation'], row['phenotype'])
    
    # The addLabel method adds a label using the rdfs:label relation
    model.addLabel(row['variant'], row['variant_label'])
    
    # addType makes the variant an individual of a class,
    # in this case SO:0000694 'SNP'
    model.addType(row['variant'], row['variant_type'])
    
    # addXref uses the relation OIO:hasDbXref
    model.addXref(row['variant'], row['dbxref'])
    
    # Serialize the graph as turtle
    print(graph.serialize(format='turtle').decode("utf-8"))


@prefix ClinVarVariant: <http://www.ncbi.nlm.nih.gov/clinvar/variation/> .
@prefix HP: <http://purl.obolibrary.org/obo/HP_> .
@prefix OBO: <http://purl.obolibrary.org/obo/> .
@prefix OIO: <http://www.geneontology.org/formats/oboInOwl#> .
@prefix RO: <http://purl.obolibrary.org/obo/RO_> .
@prefix SO: <http://purl.obolibrary.org/obo/SO_> .
@prefix dbSNP: <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> a OBO:SO_0000694 ;
    rdfs:label "C326F" ;
    OBO:RO_0002200 OBO:HP_0000748 ;
    OIO:hasDbXref <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891> .



In [25]:
# Add source (publication) and evidence (ECO code) to association

association = Association(graph, 'test_source',
                          row['variant'], row['phenotype'],
                          row['relation'])

association.add_source(row['source'])
association.add_evidence(row['evidence'])

# Let's inspect the association object
association.__dict__


Out[25]:
{'assoc_id': None,
 'definedby': 'test_source',
 'description': None,
 'evidence': ['ECO:0000220'],
 'graph': <Graph identifier=Ne4f18e2b8550490ca9fc59667b4e8f68 (<class 'dipper.graph.RDFGraph.RDFGraph'>)>,
 'model': <dipper.models.Model.Model at 0x7f5abc55b208>,
 'obj': 'HP:0000748',
 'provenance': [],
 'rel': 'RO:0002200',
 'score': None,
 'score_type': None,
 'score_unit': None,
 'source': ['PMID:12503095'],
 'sub': 'ClinVarVariant:254143'}

In [27]:
# After the association object is composed, add it to the RDF graph
# This generates an association ID by hashing its attributes (if not manually set),
# and writes triples to the rdf graph, linking the subject, predicate, and object
# using the OBAN reification model - https://github.com/EBISPOT/OBAN
association.add_association_to_graph()

# Serialize the graph. When using RDFGraphs, see
# http://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.graph.Graph.serialize
# for a list of serialization options
print(graph.serialize(format='turtle').decode("utf-8"))


@prefix ClinVarVariant: <http://www.ncbi.nlm.nih.gov/clinvar/variation/> .
@prefix ECO: <http://purl.obolibrary.org/obo/ECO_> .
@prefix HP: <http://purl.obolibrary.org/obo/HP_> .
@prefix MONARCH: <https://monarchinitiative.org/MONARCH_> .
@prefix OBAN: <http://purl.org/oban/> .
@prefix OBO: <http://purl.obolibrary.org/obo/> .
@prefix OIO: <http://www.geneontology.org/formats/oboInOwl#> .
@prefix PMID: <http://www.ncbi.nlm.nih.gov/pubmed/> .
@prefix RO: <http://purl.obolibrary.org/obo/RO_> .
@prefix SO: <http://purl.obolibrary.org/obo/SO_> .
@prefix dbSNP: <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://monarchinitiative.org/MONARCH_9bb0062d3cd43958a817322404a5f0dca52cb82e> a OBAN:association ;
    OBO:RO_0002558 OBO:ECO_0000220 ;
    dc:source <http://www.ncbi.nlm.nih.gov/pubmed/12503095> ;
    OBAN:association_has_object OBO:HP_0000748 ;
    OBAN:association_has_predicate OBO:RO_0002200 ;
    OBAN:association_has_subject <http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> .

<http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> a OBO:SO_0000694 ;
    rdfs:label "C326F" ;
    OBO:RO_0002200 OBO:HP_0000748 ;
    OIO:hasDbXref <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891> .



In [28]:
# as xml
print(graph.serialize(format='pretty-xml').decode("utf-8"))


<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
  xmlns:OIO="http://www.geneontology.org/formats/oboInOwl#"
  xmlns:OBO="http://purl.obolibrary.org/obo/"
  xmlns:OBAN="http://purl.org/oban/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
>
  <OBAN:association rdf:about="https://monarchinitiative.org/MONARCH_9bb0062d3cd43958a817322404a5f0dca52cb82e">
    <OBAN:association_has_object rdf:resource="http://purl.obolibrary.org/obo/HP_0000748"/>
    <dc:source rdf:resource="http://www.ncbi.nlm.nih.gov/pubmed/12503095"/>
    <OBAN:association_has_predicate rdf:resource="http://purl.obolibrary.org/obo/RO_0002200"/>
    <OBO:RO_0002558 rdf:resource="http://purl.obolibrary.org/obo/ECO_0000220"/>
    <OBAN:association_has_subject>
      <OBO:SO_0000694 rdf:about="http://www.ncbi.nlm.nih.gov/clinvar/variation/254143">
        <rdfs:label>C326F</rdfs:label>
        <OIO:hasDbXref rdf:resource="http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891"/>
        <OBO:RO_0002200 rdf:resource="http://purl.obolibrary.org/obo/HP_0000748"/>
      </OBO:SO_0000694>
    </OBAN:association_has_subject>
  </OBAN:association>
</rdf:RDF>