In [1]:
%matplotlib inline
import pytz
import matplotlib.pyplot as plt
import pandas as pd
import ulmo
from ulmo.util import convert_datetime
In [2]:
print(ulmo.cuahsi.wof.__doc__)
In [3]:
print([obj for obj in dir(ulmo.cuahsi.wof) if not obj.startswith('__')])
In [4]:
# WaterML/WOF WSDL endpoints
wsdlurl = 'http://54.186.36.247:8080/mysqlodm2timeseries/soap/cuahsi_1_0/.wsdl' # WOF 1.0
# 'network code'
networkcd = 'mysqlodm2timeseries'
one of two sites in the LBR sample DB
In [5]:
sitecd = 'USU-LBR-Mendon'
In [6]:
siteinfo = ulmo.cuahsi.wof.get_site_info(wsdlurl, networkcd+':'+sitecd)
In [7]:
type(siteinfo), siteinfo.keys()
Out[7]:
In [8]:
siteinfo['network'], siteinfo['code'], siteinfo['name']
Out[8]:
In [9]:
print(siteinfo['location'])
In [10]:
type(siteinfo['series']), len(siteinfo['series']), siteinfo['series'].keys()
Out[10]:
In [11]:
siteinfo['series']['mysqlodm2timeseries:USU33'].keys()
Out[11]:
In [12]:
siteinfo['series']['mysqlodm2timeseries:USU33']
Out[12]:
In [13]:
def site_series_values_to_df(series_values, variable_name):
# Create a clean timeseries list of (dt, val) tuples
tsdt_tuplst = [
(convert_datetime(valdict['datetime']).replace(tzinfo=pytz.utc),
float(valdict['value'])) for valdict in series_values['values']
]
dt, val = zip(*tsdt_tuplst)
ts_df = pd.DataFrame({'time': dt, variable_name: val})
ts_df.set_index('time', inplace=True)
ts_df.sort_index(ascending=True, inplace=True)
return ts_df
In [14]:
print(
ulmo.cuahsi.wof.get_values.__doc__.replace('<', '').replace('>', '')
)
'odm2timeseries:USU33'
is 'Oxygen, dissolved percent of saturation'
In [15]:
variablecd = 'USU33'
site_values = ulmo.cuahsi.wof.get_values(wsdlurl, networkcd+':'+sitecd, networkcd+':'+variablecd)
In [16]:
site_values.keys()
Out[16]:
In [17]:
sitevariable = site_values['variable']
sitevariable
Out[17]:
site_values['values']
is a list of individual time series values (timestamp and data value)
In [18]:
type(site_values['values']), site_values['values'][0].keys()
Out[18]:
Start and end timestamps (local time with time offset vs utc; iso8601 format)
In [19]:
site_values['values'][0]['datetime'], site_values['values'][-1]['datetime']
Out[19]:
Set a nice, user-friendly variable name string.
In [20]:
variable_name = '%s (%s)' % (sitevariable['name'], sitevariable['value_type'])
variable_name
Out[20]:
In [21]:
dtstr_last = site_values['values'][-1]['datetime']
convert_datetime(dtstr_last).replace(tzinfo=pytz.utc)
Out[21]:
Hmm, this failed:
convert_datetime(dtstr_last).astimezone(pytz.utc)
ValueError: astimezone() cannot be applied to a naive datetime
In [22]:
ts_df = site_series_values_to_df(site_values, variable_name)
ts_df.tail()
Out[22]:
In [23]:
type(ts_df), ts_df.columns, ts_df.index.dtype, ts_df.index.min(), ts_df.index.max()
Out[23]:
In [24]:
fig, ax = plt.subplots(figsize=(10, 4))
varlabel = ts_df.columns[0]
ts_df[varlabel].plot(style='-', ax=ax)
ax.set_ylabel(varlabel + ', ' + sitevariable['units']['abbreviation']);