In [51]:
from pql import *
from IPython.display import HTML
import requests
import urllib
import os
from metrique import pyclient
from metrique.utils import batch_gen, dt2ts
from metrique.jsonconf import JSONConf

In [ ]:
# SIMPLE DEFAULTS
config = {
 'host': '127.0.0.1',
 'ssl': False,
 'ssl_verify': False,
 'password': 'YOUR_PASSWORD',
 'debug': True
}

In [ ]:
# load the pyclient interface (load cubes, extract data, query, etc)
m = pyclient(**config)

In [ ]:
# register user
#m.user_register(password='YOUR_PASSWORD')  # run once

In [ ]:
m.ping(auth=True)

In [ ]:
m.cube_list_all()

In [ ]:
tmp = '/tmp'
uri = 'https://commondatastorage.googleapis.com/ckannet-storage/2012-03-03T021709/environment.csv'
saved_uri = os.path.join(tmp, os.path.basename(uri))
# cache the file locally (not absolutely necessary)
if not os.path.exists(saved_uri):
    urllib.urlretrieve(uri, saved_uri)

csv = m.get_cube('csvdata_rows', name='environmental_data', batch_size=1000)

In [ ]:
#csv.cube_register()  # run once
m.cube_list_all()  # should show ['USER__environment_data']

In [ ]:
# each object needs a unique 'object id'; we'll use country + year
_oid = lambda o: '_'.join((o['country_name'], o['year']))
# this is historical data, we'll override the implicit timestamps using year data
_start = lambda o: dt2ts('%s-01-01' % o['year'])
# extract the data and get back a list of the ids extracts ok/failed
saved = csv.extract(uri=saved_uri, _oid=_oid, _start=_start)
# note, extracting the same data 1+ times will only save 1 version of every object (row)

In [ ]:
result = csv.cube_index('country_name')

In [ ]:
%time df = csv.query_sample(fields='country_name')

In [ ]:
%time fields = csv.cube_sample_fields()

In [ ]:
%time countries = csv.distinct('country_name')

In [ ]:
z = csv.find(fields='country_name, population_in_largest_city, population_density_(people_per_sq_km_of_land_area)')
z[z.country_name == 'United States'].plot(x='_start', y='population_in_largest_city')

In [ ]:
z[z.country_name == 'United States'].plot(x='_start', y='population_density_(people_per_sq_km_of_land_area)')

In [ ]: