Presentation

This notebook focuses on writing SPARQL queries for relevant BioPAX patterns.

Endpoint

PathwayCommons offers a public sparql endpoint at http://rdf.pathwaycommons.org/sparql/


In [1]:
pathwayCommonsEndpoint = \
  "https://www.ebi.ac.uk/rdf/services/reactome/sparql"

In [5]:
from SPARQLWrapper import SPARQLWrapper, JSON
from IPython.display import display, Markdown 
    # for telling jupyter to display the result as markdown

def runQuery(queryString, outputFormat="tsv", varList=[], truncateAt=30):
    """ Send the query to the endpoint and attempt 
    to nicely display the result.
    
    Possible values for outputFormat: "tsv", "markdown"
    """
    sparql = SPARQLWrapper(pathwayCommonsEndpoint)
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    if outputFormat == "tsv":
        displayQueryResultAsTSV(results, varList)
    elif outputFormat == "markdown":
        displayQueryResultAsMarkdown(results, varList, truncateAt)

def displayQueryResultAsTSV(queryResult, varList=[], truncateAt=30):
    if len(queryResult["results"]["bindings"]) == 0:
        print("Empty result")
        return
    if varList == []:
        varList = [varName for varName in queryResult["results"]["bindings"][0].keys()]
    displayResult = ""
    for currentVar in varList:
        displayResult += currentVar + "\t"
    displayResult = displayResult[:-1] + "\n"
    for result in queryResult["results"]["bindings"]:
        for currentVar in varList:
            if currentVar in result.keys():
                displayResult += truncateString(result[currentVar]['value'], truncateAt) + "\t"
            else:
                displayResult += "\t"
        displayResult = displayResult[:-1] + "\n"
    print(displayResult)

def displayQueryResultAsMarkdown(queryResult, varList=[], truncateAt=30):
    if len(queryResult["results"]["bindings"]) == 0:
        print("Empty result")
        return
    if varList == []:
        varList = [varName for varName in queryResult["results"]["bindings"][0].keys()]
    displayResult = ""
    sepLine = ""
    for currentVar in varList:
        displayResult += " | " + currentVar
        sepLine += "| ---"
    displayResult += "\n" + sepLine + "\n"
    for result in queryResult["results"]["bindings"]:
        for currentVar in varList:
            if currentVar in result.keys():
                displayResult += "| " + truncateString(result[currentVar]['value'], truncateAt) + " "
            else:
                displayResult += "|  "
        displayResult += " \n"
    display(Markdown(displayResult))

def truncateString(message, length=30):
    if (length == -1) or (len(message) <= length):
        return message
    return message[:15] + "..." + message[-12:]

In [6]:
commonPrefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>
PREFIX taxon: <http://identifiers.org/taxonomy/>
PREFIX reactome: <http://identifiers.org/reactome/>
PREFIX release: <http://www.reactome.org/biopax/49/48887#>

PREFIX up: <http://purl.uniprot.org/core/> 
PREFIX uniprot: <http://purl.uniprot.org/uniprot/>

PREFIX chebi: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX obo2: <http://purl.obolibrary.org/obo#>

"""

In [7]:
queryNbPathways = """
#SELECT (count(DISTINCT ?pathway) as ?nbPathways)
SELECT (count(DISTINCT ?pathway) as ?nbPathways)

WHERE {
  ?pathway rdf:type bp3:Pathway .
}
"""

runQuery(commonPrefixes + queryNbPathways, "markdown", [])


nbPathways
21195

In [ ]: