This is a notebook demos how to query associations from IMPC include evidence and provenance modeled with SEPIO

To setup:

pip install dipper jupyter ipython

from dipper.graph.RDFGraph import RDFGraph

impc_graph = ""

graph = RDFGraph()

# Import the rdf file, this takes a minute or two
graph.parse(impc_graph, format='turtle')

# How many subjects have a has_phenotype relation

has_phenotype = graph._getNode("RO:0002200")


# How many oban associations are in the graph
from rdflib.namespace import RDF

association = graph._getNode("OBAN:association")
len(list(graph.subjects(RDF.type, association)))


# How many lines of mutant phenotype evidence

mut_pheno_evidence = graph._getNode("ECO:0000015")
len(list(graph.subjects(RDF.type, mut_pheno_evidence)))


Given a list of genotype and phenotype labels, create a subgraph containing the 'has phenotype' relation, and all evidence.

# Bind all namespaces in curie_map for sparql

genotype = "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)"
phenotype = "MP:0001399"

query = """
    ?genotype rdfs:label '{0}' .
    ?genotype RO:0002200 {1} .
    ?assoc a ?assoc_type ;
        OBAN:association_has_object {1} ;
        OBAN:association_has_predicate RO:0002200 ;
        OBAN:association_has_subject ?genotype ;
        RO:0002558 ECO:0000015 ;
        SEPIO:0000007 ?evidenceline ;
        SEPIO:0000015 ?assertion .
    ?assertion SEPIO:0000018 ?creator ;
        SEPIO:0000111 ?evidenceline  .
    ?assertion a ?assertion_type .
    ?creator ?creator_predicates ?creator_objects .
    ?evidenceline SEPIO:0000084 ?measure1 ;
        SEPIO:0000085 ?study  .
    ?evidenceline a ?ev_type .
    ?measure1 ?measure_predicates ?measure_objects .
    ?study ?study_predicates ?study_objects .
    ?study_objects ?stud_p ?stud_o .
    ?measure_objects ?meas_p ?meas_o .
""".format(genotype, phenotype)

sparql_query = """
    WHERE {{

sparql_output = graph.query(sparql_query)

subGraph = RDFGraph()

for triple in sparql_output:

subGraph.bind("OBAN", "")


@prefix OBAN: <> .
@prefix OBO: <> .
@prefix rdf: <> .
@prefix rdfs: <> .
@prefix xml: <> .
@prefix xsd: <> .

<> a OBAN:association ;
    OBO:RO_0002558 OBO:ECO_0000015 ;
    OBO:SEPIO_0000007 <>,
        <> ;
    OBO:SEPIO_0000015 <> ;
    OBAN:association_has_object OBO:MP_0001399 ;
    OBAN:association_has_predicate OBO:RO_0002200 ;
    OBAN:association_has_subject <> .

<> a <> ;
    rdfs:label "International Mouse Phenotyping Consortium" .

<> OBO:RO_0002353 <> .

<> OBO:RO_0002353 <> .

<> OBO:RO_0002353 <> .

<> OBO:RO_0002353 <> .

<> OBO:RO_0002353 <> .

<> a OBO:SEPIO_0000001 ;
    OBO:SEPIO_0000018 <> ;
    OBO:SEPIO_0000111 <>,
        <> .

<> OBO:RO_0002353 <> .

<> rdfs:label "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)" ;
    OBO:RO_0002200 OBO:MP_0001399 .

<> a <> ;
    rdfs:label "Whole arena resting time (Open-field)" .

<> a <> ;
    rdfs:label "Locomotor activity (Modified SHIRPA)" .

<> a <> ;
    rdfs:label "Open-field" .

<> a <> ;
    rdfs:label "Modified SHIRPA" .

<> a <> ;
    rdfs:label "European Mouse Disease Clinic" .

<> a <> ;
    rdfs:label "WTSI" .

<> a OBO:ECO_0000015 ;
    OBO:SEPIO_0000084 <>,
        <> ;
    OBO:SEPIO_0000085 <> .

<> a OBO:ECO_0000015 ;
    OBO:SEPIO_0000084 <>,
        <> ;
    OBO:SEPIO_0000085 <> .

<> a <> ;
    rdfs:label "EUMODIC Pipeline 2" .

<> a OBO:OBI_0000471 ;
    OBO:BFO_0000050 <>,
        <> ;
    OBO:BFO_0000051 OBO:STATO_0000372,
        <> ;
    OBO:SEPIO_0000017 <> ;
    OBO:SEPIO_0000114 <> .

<> a OBO:OBI_0000471 ;
    OBO:BFO_0000050 <>,
        <> ;
    OBO:BFO_0000051 OBO:STATO_0000372,
        <> ;
    OBO:SEPIO_0000017 <> ;
    OBO:SEPIO_0000114 <> .

# Turn this into a function

template = """
    ?genotype rdfs:label '{0}' .
    ?genotype RO:0002200 {1} .
    ?assoc a ?assoc_type ;
        OBAN:association_has_object {1} ;
        OBAN:association_has_predicate RO:0002200 ;
        OBAN:association_has_subject ?genotype ;
        RO:0002558 ECO:0000015 ;
        SEPIO:0000007 ?evidenceline ;
        SEPIO:0000015 ?assertion .
    ?assertion SEPIO:0000018 ?creator ;
        SEPIO:0000111 ?evidenceline  .
    ?assertion a ?assertion_type .
    ?creator ?creator_predicates ?creator_objects .
    ?evidenceline SEPIO:0000084 ?measure1 ;
        SEPIO:0000085 ?study  .
    ?evidenceline a ?ev_type .
    ?measure1 ?measure_predicates ?measure_objects .
    ?study ?study_predicates ?study_objects .
    ?study_objects ?stud_p ?stud_o .
    ?measure_objects ?meas_p ?meas_o .

subGraph = RDFGraph()
subGraph.bind("OBAN", "")

def create_subgraph(query, graph, new_graph):
    sparql_query = """
        CONSTRUCT {{ 
        WHERE {{

    sparql_output = graph.query(sparql_query)

    for triple in sparql_output:
g2p_list = [
    ["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)",
    ["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (male)",
    ["Mapkap1<tm1b(EUCOMM)Wtsi>/Mapkap1<+> [C57BL/6N-BCM-Mapkap1 EPD0609_2_F05-B] (female)",
    ["Hbs1l<tm1a(KOMP)Wtsi>/Hbs1l<tm1a(KOMP)Wtsi> [C57BL/6N-WTSI-METC] (female)",
    ["Gnao1<tm1b(EUCOMM)Hmgu>/Gnao1<+> [C57BL/6NTac-MRC Harwell-H-GNAO1-G05-TM1B] (male)",

for g2p in g2p_list:
    query = template.format(g2p[0], g2p[1])
    create_subgraph(query, graph, subGraph)

subGraph.serialize("/home/kshefchek/impc_test.ttl", format="ttl")

