This is a notebook demos how to query associations from IMPC include evidence and provenance modeled with SEPIO

To setup:

pip install dipper jupyter ipython


In [17]:
from dipper.graph.RDFGraph import RDFGraph

impc_graph = "https://data.monarchinitiative.org/ttl/impc.ttl"

graph = RDFGraph()

# Import the rdf file, this takes a minute or two
graph.parse(impc_graph, format='turtle')


Out[17]:
<Graph identifier=https://data.monarchinitiative.org/ttl/impc.ttl (<class 'rdflib.graph.Graph'>)>

In [18]:
# How many subjects have a has_phenotype relation

has_phenotype = graph._getNode("RO:0002200")
len(list(graph.subjects(predicate=has_phenotype)))


Out[18]:
19843

In [19]:
# How many oban associations are in the graph
from rdflib.namespace import RDF

association = graph._getNode("OBAN:association")
len(list(graph.subjects(RDF.type, association)))


Out[19]:
19843

In [20]:
# How many lines of mutant phenotype evidence

mut_pheno_evidence = graph._getNode("ECO:0000015")
len(list(graph.subjects(RDF.type, mut_pheno_evidence)))


Out[20]:
23518

Given a list of genotype and phenotype labels, create a subgraph containing the 'has phenotype' relation, and all evidence.


In [84]:
# Bind all namespaces in curie_map for sparql
graph.bind_all_namespaces()

genotype = "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)"
phenotype = "MP:0001399"

query = """
    ?genotype rdfs:label '{0}' .
    ?genotype RO:0002200 {1} .
    
    ?assoc a ?assoc_type ;
        OBAN:association_has_object {1} ;
        OBAN:association_has_predicate RO:0002200 ;
        OBAN:association_has_subject ?genotype ;
        RO:0002558 ECO:0000015 ;
        SEPIO:0000007 ?evidenceline ;
        SEPIO:0000015 ?assertion .
    
    ?assertion SEPIO:0000018 ?creator ;
        SEPIO:0000111 ?evidenceline  .
        
    ?assertion a ?assertion_type .
        
    ?creator ?creator_predicates ?creator_objects .
    
    ?evidenceline SEPIO:0000084 ?measure1 ;
        SEPIO:0000085 ?study  .
        
    ?evidenceline a ?ev_type .
        
    ?measure1 ?measure_predicates ?measure_objects .
    ?study ?study_predicates ?study_objects .
    
    ?study_objects ?stud_p ?stud_o .
    ?measure_objects ?meas_p ?meas_o .
    
""".format(genotype, phenotype)

sparql_query = """
    CONSTRUCT {{ 
        {0}
    }}
    WHERE {{
        {0}
    }}
""".format(query)

sparql_output = graph.query(sparql_query)

subGraph = RDFGraph()

for triple in sparql_output:
    subGraph.add(triple)

subGraph.bind("OBAN", "http://purl.org/oban/")

print(subGraph.serialize(format='turtle').decode("utf-8"))


@prefix OBAN: <http://purl.org/oban/> .
@prefix OBO: <http://purl.obolibrary.org/obo/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://monarchinitiative.org/MONARCH_64baf3a9fd766d9b45b8706afc8f6e43b8a007d0> a OBAN:association ;
    OBO:RO_0002558 OBO:ECO_0000015 ;
    OBO:SEPIO_0000007 <https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528>,
        <https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> ;
    OBO:SEPIO_0000015 <https://monarchinitiative.org/.well-known/genid/e0339fd4e780575854a9b15a9f53440b50aa9aa7> ;
    OBAN:association_has_object OBO:MP_0001399 ;
    OBAN:association_has_predicate OBO:RO_0002200 ;
    OBAN:association_has_subject <https://monarchinitiative.org/MONARCH_20c701bb7b16f52735288b5bf85023bd71669aca> .

<http://www.mousephenotype.org/> a <http://xmlns.com/foaf/0.1/organization> ;
    rdfs:label "International Mouse Phenotyping Consortium" .

<https://monarchinitiative.org/.well-known/genid/04e4445c9ae9486fb28c44e40ff01f877e16884a> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> .

<https://monarchinitiative.org/.well-known/genid/370c861b680c514c26890e9811e88f230b25f7ad> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> .

<https://monarchinitiative.org/.well-known/genid/52b640304090c1033aca385fd3e90dbdc18ca0cb> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> .

<https://monarchinitiative.org/.well-known/genid/5f238f0d997802a81db5d41410c9382b3f9d7bff> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> .

<https://monarchinitiative.org/.well-known/genid/dce47fd7382541f3202224331d338fb62a8564f1> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> .

<https://monarchinitiative.org/.well-known/genid/e0339fd4e780575854a9b15a9f53440b50aa9aa7> a OBO:SEPIO_0000001 ;
    OBO:SEPIO_0000018 <http://www.mousephenotype.org/> ;
    OBO:SEPIO_0000111 <https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528>,
        <https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> .

<https://monarchinitiative.org/.well-known/genid/e65e5afb7c929931d707b90944c49e8a5b2ef2f6> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> .

<https://monarchinitiative.org/MONARCH_20c701bb7b16f52735288b5bf85023bd71669aca> rdfs:label "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)" ;
    OBO:RO_0002200 OBO:MP_0001399 .

<https://www.mousephenotype.org/impress/parameterontologies/330/10> a <http://www.w3.org/2002/07/owl#NamedIndividual> ;
    rdfs:label "Whole arena resting time (Open-field)" .

<https://www.mousephenotype.org/impress/parameterontologies/359/11> a <http://www.w3.org/2002/07/owl#NamedIndividual> ;
    rdfs:label "Locomotor activity (Modified SHIRPA)" .

<https://www.mousephenotype.org/impress/protocol/10/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ;
    rdfs:label "Open-field" .

<https://www.mousephenotype.org/impress/protocol/11/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ;
    rdfs:label "Modified SHIRPA" .

<http://www.eumodic.org/> a <http://vivoweb.org/ontology/core#Project> ;
    rdfs:label "European Mouse Disease Clinic" .

<http://www.sanger.ac.uk/> a <http://xmlns.com/foaf/0.1/organization> ;
    rdfs:label "WTSI" .

<https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528> a OBO:ECO_0000015 ;
    OBO:SEPIO_0000084 <https://monarchinitiative.org/.well-known/genid/04e4445c9ae9486fb28c44e40ff01f877e16884a>,
        <https://monarchinitiative.org/.well-known/genid/52b640304090c1033aca385fd3e90dbdc18ca0cb>,
        <https://monarchinitiative.org/.well-known/genid/5f238f0d997802a81db5d41410c9382b3f9d7bff> ;
    OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> .

<https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> a OBO:ECO_0000015 ;
    OBO:SEPIO_0000084 <https://monarchinitiative.org/.well-known/genid/370c861b680c514c26890e9811e88f230b25f7ad>,
        <https://monarchinitiative.org/.well-known/genid/dce47fd7382541f3202224331d338fb62a8564f1>,
        <https://monarchinitiative.org/.well-known/genid/e65e5afb7c929931d707b90944c49e8a5b2ef2f6> ;
    OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> .

<https://www.mousephenotype.org/impress/procedures/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ;
    rdfs:label "EUMODIC Pipeline 2" .

<https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> a OBO:OBI_0000471 ;
    OBO:BFO_0000050 <http://www.eumodic.org/>,
        <https://www.mousephenotype.org/impress/procedures/2> ;
    OBO:BFO_0000051 OBO:STATO_0000372,
        <https://www.mousephenotype.org/impress/protocol/10/2> ;
    OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> ;
    OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/330/10> .

<https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> a OBO:OBI_0000471 ;
    OBO:BFO_0000050 <http://www.eumodic.org/>,
        <https://www.mousephenotype.org/impress/procedures/2> ;
    OBO:BFO_0000051 OBO:STATO_0000372,
        <https://www.mousephenotype.org/impress/protocol/11/2> ;
    OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> ;
    OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/359/11> .



In [91]:
# Turn this into a function

template = """
    ?genotype rdfs:label '{0}' .
    ?genotype RO:0002200 {1} .
    
    ?assoc a ?assoc_type ;
        OBAN:association_has_object {1} ;
        OBAN:association_has_predicate RO:0002200 ;
        OBAN:association_has_subject ?genotype ;
        RO:0002558 ECO:0000015 ;
        SEPIO:0000007 ?evidenceline ;
        SEPIO:0000015 ?assertion .
    
    ?assertion SEPIO:0000018 ?creator ;
        SEPIO:0000111 ?evidenceline  .
        
    ?assertion a ?assertion_type .
        
    ?creator ?creator_predicates ?creator_objects .
    
    ?evidenceline SEPIO:0000084 ?measure1 ;
        SEPIO:0000085 ?study  .
        
    ?evidenceline a ?ev_type .
        
    ?measure1 ?measure_predicates ?measure_objects .
    ?study ?study_predicates ?study_objects .
    
    ?study_objects ?stud_p ?stud_o .
    ?measure_objects ?meas_p ?meas_o .
"""

subGraph = RDFGraph()
subGraph.bind("OBAN", "http://purl.org/oban/")

def create_subgraph(query, graph, new_graph):
    sparql_query = """
        CONSTRUCT {{ 
            {0}
        }}
        WHERE {{
            {0}
        }}
        """.format(query)

    sparql_output = graph.query(sparql_query)

    for triple in sparql_output:
        new_graph.add(triple)
        
g2p_list = [
    ["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)",
     "MP:0001399"
    ],
    ["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (male)",
     "MP:0001399"
    ],
    ["Mapkap1<tm1b(EUCOMM)Wtsi>/Mapkap1<+> [C57BL/6N-BCM-Mapkap1 EPD0609_2_F05-B] (female)",
     "MP:0002753"
    ],
    ["Hbs1l<tm1a(KOMP)Wtsi>/Hbs1l<tm1a(KOMP)Wtsi> [C57BL/6N-WTSI-METC] (female)",
     "MP:0005292"
    ],
    ["Gnao1<tm1b(EUCOMM)Hmgu>/Gnao1<+> [C57BL/6NTac-MRC Harwell-H-GNAO1-G05-TM1B] (male)",
     "MP:0001399"
    ]
]

for g2p in g2p_list:
    query = template.format(g2p[0], g2p[1])
    create_subgraph(query, graph, subGraph)

subGraph.serialize("/home/kshefchek/impc_test.ttl", format="ttl")

In [ ]: