In [1]:
import pandas as pd
from io import StringIO
import grlc
import grlc.utils as utils
import grlc.swagger as swagger
We can load the grlc specification for a github repository. For example, my github username is c-martinez and my SPARQL queries are on grlc-queries repo.
In [2]:
user = 'c-martinez'
repo = 'grlc-queries'
spec = swagger.build_spec(user, repo)
In [11]:
print spec[0].keys()
In [10]:
print spec[0]['query']
We can use dispatch_query functions to load data from a specific query (dbpediaCapitals in this case). For this example, we are loading data in text/csv format.
NOTE: dbpediaCapitals query loads data from dbpedia.org -- the endpoint is specified via the endpoint decorator on the query file itself.
In [4]:
query_name = 'dbpediaCapitals'
acceptHeader = 'text/csv'
data, code, headers = utils.dispatch_query(user, repo, query_name, acceptHeader=acceptHeader)
Now we just transform these results to a pandas dataframe.
In [5]:
data_grlc = pd.read_csv(StringIO(data))
data_grlc.head(10)
Out[5]:
In [6]:
import requests
In [7]:
headers = {'accept': 'text/csv'}
resp = requests.get("http://grlc.io/api/c-martinez/grlc-queries/dbpediaCapitals", headers=headers)
In [8]:
data_requests = pd.read_csv(StringIO(resp.text))
data_requests.head(10)
Out[8]: