http://www.wikiwatershed-vs.org examples:
In [1]:
import json
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
import datetime
import time
import calendar
import pytz
#from matplotlib.dates import date2num, num2date
utc_tz = pytz.utc
def epochsec_to_dt(epochsec):
""" Return the datetime object for epoch seconds epochsec
"""
dtnaive_dt = datetime.datetime.utcfromtimestamp(epochsec)
dtutc_dt = dtnaive_dt.replace(tzinfo=pytz.utc)
return dtutc_dt
In [3]:
def get_measurement_byvarid(metaresult, var_id):
return [e for e in metaresult['measurements'] if e['var_id'] == var_id][0]
In [4]:
vz_gai_url = "http://www.wikiwatershed-vs.org/services/get_asset_info.php"
In [5]:
meta_r = requests.get(vz_gai_url, params={'asset_type':'siso', 'opt':'meta'})
meta = meta_r.json()
In [6]:
meta.keys(), meta['success']
Out[6]:
In [7]:
type(meta['result']), len(meta['result'])
Out[7]:
In [8]:
# siso_id is the unique identifier (string type) for the station
siso_id_lst = [e['siso_id'] for e in meta['result']]
In [9]:
# Examine the response for the first station (index 0) in the returned list
meta['result'][0]['siso_id']
Out[9]:
In [10]:
meta['result'][0]
Out[10]:
In [11]:
stations_rec = []
for sta in meta['result']:
sta_rec = {key:sta[key] for key in ['siso_id', 'name', 'lat', 'lon',
'platform_type', 'provider']}
stations_rec.append(sta_rec)
In [12]:
stations_df = pd.DataFrame.from_records(stations_rec)
stations_df.set_index('siso_id', inplace=True, verify_integrity=True)
stations_df.index.name = 'siso_id'
print len(stations_df)
In [13]:
stations_df.head(10)
Out[13]:
In [14]:
stations_df.platform_type.value_counts()
Out[14]:
In [15]:
stations_df.provider.value_counts()
Out[15]:
In [16]:
# USGS Schuylkill River at Philadelphia
siso_id = 'USGS_01474500'
In [17]:
# (asset_id, a more generic descriptor for the unique id of any asset)
meta_r = requests.get(vz_gai_url, params={'asset_type':'siso', 'opt':'meta',
'asset_id':siso_id})
In [18]:
# use [0] to pull out the dict from the single-element list
metaresult = meta_r.json()['result'][0] # ideally, should first test for success
In [19]:
# var_id is the unique identifier for a "measurement" (or variable)
[(d['var_id'], d['depth']) for d in metaresult['measurements']]
Out[19]:
In [20]:
metaresult['name']
Out[20]:
In [21]:
data_r = requests.get(vz_gai_url, params={'asset_type':'siso', 'opt':'data', 'units_mode': 'v1',
'asset_id':siso_id, 'var_id':'all'})
data = data_r.json()
In [22]:
data['success'], len(data['result']), data['result'][0].keys(), len(data['result'][0]['data'])
Out[22]:
In [23]:
# Mapping of var_id string to 'result' list element index
var_ids = {e['var_id']:i for i,e in enumerate(data['result'])}
var_ids
Out[23]:
In [24]:
var_id = 'H1_Discharge'
In [25]:
get_measurement_byvarid(metaresult, var_id)
Out[25]:
In [26]:
data['result'][var_ids[var_id]]['data'][-10:]
Out[26]:
In [27]:
# Pull out data time series for one variable based on var_id
# returns a list of dicts
data_lst = data['result'][var_ids[var_id]]['data']
In [28]:
data_df = pd.DataFrame.from_records(data_lst)
In [29]:
data_df.head()
Out[29]:
Create dtutc
column with parsed datetime. Also, it's safer to rename the "value" column to something unlikely to conflict with pandas method names.
In [30]:
data_df['dtutc'] = data_df.time.map(lambda es: epochsec_to_dt(es))
data_df.set_index('dtutc', inplace=True, verify_integrity=True)
data_df.index.name = 'dtutc'
data_df = data_df.rename(columns={'value':var_id})
In [31]:
data_df.info()
In [32]:
data_df.head()
Out[32]:
In [33]:
data_df.describe()
Out[33]:
In [34]:
var_info = get_measurement_byvarid(metaresult, var_id)
title = "%s (%s) at %s" % (var_info['name'], var_id, metaresult['name'])
data_df[var_id].plot(title=title, figsize=[11,5])
plt.ylabel(var_info['units']);