Prise en main SPARQL

Exercice 1 : DESCRIBE

Lister les informations relatives à l'identifiant ENSG00000139618 (Ensembl).


In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON, XML

# Set SPARQL endpoint
endpoint = "http://www.ebi.ac.uk/rdf/services/ensembl/sparql"

sparql = SPARQLWrapper(endpoint)

In [2]:
# Set prefix
prefix = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX identifiers: <http://identifiers.org/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX ensembltranscript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX ensemblexon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX ensemblprotein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX ensemblterms: <http://rdf.ebi.ac.uk/terms/ensembl/>
"""

In [3]:
# Query
query = """
DESCRIBE ensembl:ENSG00000139618
"""

In [4]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(XML)

In [5]:
results = sparql.query().convert()


---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
~/miniconda3/envs/jupyter/lib/python3.6/site-packages/SPARQLWrapper/Wrapper.py in _query(self)
    536         try:
--> 537             response = urlopener(request)
    538             return response, self.returnFormat

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    531             meth = getattr(processor, meth_name)
--> 532             response = meth(req, response)
    533 

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in http_response(self, request, response)
    641             response = self.parent.error(
--> 642                 'http', request, response, code, msg, hdrs)
    643 

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in error(self, proto, *args)
    569             args = (dict, 'default', 'http_error_default') + orig_args
--> 570             return self._call_chain(*args)
    571 

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:

~/miniconda3/envs/jupyter/lib/python3.6/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
    649     def http_error_default(self, req, fp, code, msg, hdrs):
--> 650         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    651 

HTTPError: HTTP Error 500: Internal Server Error

During handling of the above exception, another exception occurred:

EndPointInternalError                     Traceback (most recent call last)
<ipython-input-5-f0d7a96b5cac> in <module>()
----> 1 results = sparql.query().convert()

~/miniconda3/envs/jupyter/lib/python3.6/site-packages/SPARQLWrapper/Wrapper.py in query(self)
    565             @rtype: L{QueryResult} instance
    566         """
--> 567         return QueryResult(self._query())
    568 
    569     def queryAndConvert(self):

~/miniconda3/envs/jupyter/lib/python3.6/site-packages/SPARQLWrapper/Wrapper.py in _query(self)
    543                 raise EndPointNotFound(e.read())
    544             elif e.code == 500:
--> 545                 raise EndPointInternalError(e.read())
    546             else:
    547                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
b'virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused'

In [7]:
# print results
for subj, pred, obj in results:
    print subj + "\t" + pred + "\t" + obj


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-c21bf3fefa04> in <module>()
      1 # print results
----> 2 for subj, pred, obj in results:
      3     print subj + "\t" + pred + "\t" + obj

NameError: name 'results' is not defined

Exercice 2 : SELECT AND FILTER

Lister les alias (noms) de ENSG00000139618.


In [8]:
query = """
SELECT ?label ?altlabel
WHERE {
     FILTER (REGEX(?x,ensembl:ENSG00000139618,"i"))
     ?x skos:altlabel ?altlabel.
     ?x rdfs:label ?label.
}
"""

In [9]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(JSON)

In [10]:
results = sparql.query().convert()


---------------------------------------------------------------------------
EndPointInternalError                     Traceback (most recent call last)
<ipython-input-10-f0d7a96b5cac> in <module>()
----> 1 results = sparql.query().convert()

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in query(self)
    599             @rtype: L{QueryResult} instance
    600         """
--> 601         return QueryResult(self._query())
    602 
    603     def queryAndConvert(self):

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in _query(self)
    577                 raise EndPointNotFound(e.read())
    578             elif e.code == 500:
--> 579                 raise EndPointInternalError(e.read())
    580             else:
    581                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused

In [11]:
for result in results["results"]["bindings"]:
    print result['label']['value'] + "\t" + result['altlabel']['value']


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-cb87b5fe3932> in <module>()
----> 1 for result in results["results"]["bindings"]:
      2     print result['label']['value'] + "\t" + result['altlabel']['value']

NameError: name 'results' is not defined

Exercice 3 : SELECT AND VALUES

Lister les transcrits de ENSG00000139618. (2 étapes)


In [12]:
query = """
SELECT DISTINCT ?p 
WHERE {
 ?s ?p ?o .
  VALUES(?o) {
    (ensembl:ENSG00000139618)
  }
}
"""

In [13]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()


---------------------------------------------------------------------------
EndPointInternalError                     Traceback (most recent call last)
<ipython-input-13-45226ff4a4fc> in <module>()
      1 sparql.setQuery(prefix + query)
      2 sparql.setReturnFormat(JSON)
----> 3 results = sparql.query().convert()

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in query(self)
    599             @rtype: L{QueryResult} instance
    600         """
--> 601         return QueryResult(self._query())
    602 
    603     def queryAndConvert(self):

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in _query(self)
    577                 raise EndPointNotFound(e.read())
    578             elif e.code == 500:
--> 579                 raise EndPointInternalError(e.read())
    580             else:
    581                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused

In [14]:
for result in results["results"]["bindings"]:
    print result['p']['value']


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-2fae7ee161eb> in <module>()
----> 1 for result in results["results"]["bindings"]:
      2     print result['p']['value']

NameError: name 'results' is not defined

In [15]:
query = """
SELECT ?s ?o WHERE {
 ?s obo:SO_transcribed_from ?o.
  VALUES(?o) {
    (ensembl:ENSG00000139618)
  }
}
"""

In [16]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()


---------------------------------------------------------------------------
EndPointInternalError                     Traceback (most recent call last)
<ipython-input-16-45226ff4a4fc> in <module>()
      1 sparql.setQuery(prefix + query)
      2 sparql.setReturnFormat(JSON)
----> 3 results = sparql.query().convert()

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in query(self)
    599             @rtype: L{QueryResult} instance
    600         """
--> 601         return QueryResult(self._query())
    602 
    603     def queryAndConvert(self):

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in _query(self)
    577                 raise EndPointNotFound(e.read())
    578             elif e.code == 500:
--> 579                 raise EndPointInternalError(e.read())
    580             else:
    581                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused

In [ ]:
for result in results["results"]["bindings"]:
    print result['s']['value'] + "   " + result['o']['value']

Exercice 3 bis : SELECT AND VALUES

A partir de la requête précédente, afficher les noms des transcrits (une seule requête).


In [17]:
query = """
SELECT ?s ?label WHERE {
 ?s obo:SO_transcribed_from ?o.
 ?s rdfs:label ?label .
  VALUES(?o) {
    (ensembl:ENSG00000139618)
  }
}
"""

In [18]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()


---------------------------------------------------------------------------
EndPointInternalError                     Traceback (most recent call last)
<ipython-input-18-45226ff4a4fc> in <module>()
      1 sparql.setQuery(prefix + query)
      2 sparql.setReturnFormat(JSON)
----> 3 results = sparql.query().convert()

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in query(self)
    599             @rtype: L{QueryResult} instance
    600         """
--> 601         return QueryResult(self._query())
    602 
    603     def queryAndConvert(self):

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in _query(self)
    577                 raise EndPointNotFound(e.read())
    578             elif e.code == 500:
--> 579                 raise EndPointInternalError(e.read())
    580             else:
    581                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused

In [19]:
for result in results["results"]["bindings"]:
    print result['s']['value'] + "   " + result['label']['value']


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-37fb62b8298e> in <module>()
----> 1 for result in results["results"]["bindings"]:
      2     print result['s']['value'] + "   " + result['label']['value']

NameError: name 'results' is not defined

Exercice 4 : FALDO

Déterminer la position de début et de fin des trancrits de ENSG00000139618 sur le génome.


In [20]:
query = """
SELECT DISTINCT ?x ?z ?debut ?fin WHERE {
 ?s obo:SO_transcribed_from ?o.
 ?s faldo:location ?location.
 ?location faldo:begin ?debut.
 ?location faldo:end ?fin.
  VALUES(?o) {
    (ensembl:ENSG00000139168)
  }
}
"""

In [21]:
sparql.setQuery(prefix + query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()


---------------------------------------------------------------------------
EndPointInternalError                     Traceback (most recent call last)
<ipython-input-21-45226ff4a4fc> in <module>()
      1 sparql.setQuery(prefix + query)
      2 sparql.setReturnFormat(JSON)
----> 3 results = sparql.query().convert()

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in query(self)
    599             @rtype: L{QueryResult} instance
    600         """
--> 601         return QueryResult(self._query())
    602 
    603     def queryAndConvert(self):

/home/symetric/Documents/Lib/anaconda2/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.pyc in _query(self)
    577                 raise EndPointNotFound(e.read())
    578             elif e.code == 500:
--> 579                 raise EndPointInternalError(e.read())
    580             else:
    581                 raise e

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
virtuoso.jdbc4.VirtuosoException: Connection failed: Connection refused

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: