Currently it only illustrates a request for a station ("feature") given its known station_code (nesdis_id); no other type of collection query is illustrated. Then, it shows access to data from the specified station; extraction of station metadata; and conversion of the returned multi-variable time series to a pandas DataFrame, followed by a time series plot from the DataFrame. 2014 June 18. Emilio Mayorga.
In [1]:
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from pyoos.collectors.hads.hads import Hads
In [2]:
# FROM pyoos SOS handling
# Convenience function to build record style time series representation
# Test for string NaN added for HADS collector. Ideally the change should be
# made in pyoos, on the HADS parser so 'values' returns floats or np.nan
def flatten_element(p):
rd = {'time':p.time}
for m in p.members:
rd[m['standard']] = float(m['value']) if m['value'] != 'NaN' else np.nan
#rd[m['standard']] = m['value']
return rd
# sta.get_unique_members() serves the same function as the pyoos SOS get_unique_members method
# Convenience function to extract a dict of unique members (observed properties) by standard name
obsprops_bystdname = lambda sta: {m['standard']:m for m in sta.get_unique_members()}
In [3]:
# Not used, for now
#states_url = "http://amazon.nws.noaa.gov/hads/charts/OR.html"
In [4]:
hadsData = Hads()
In [5]:
# Access South Slough North Spit station, for the last 2 days (roughly)
#hadsData.filter(features=['346F229A'],
# start=datetime.utcnow() - timedelta(days=2),
# end=datetime.utcnow() - timedelta(hours=12))
In [6]:
#hadsData.filter(features=['346F229A'], states_url=states_url)
#hadsData.filter(states_url=states_url)
# NOT CLEAR YET IF THIS FILTER IF ALSO NEEDED,
# OR IF SETTING hadsData.station_codes IS ENOUGH
hadsData.filter(features=['346F229A'])
Out[6]:
In [7]:
hadsData.station_codes = ['346F229A']
In [8]:
hadsData.metadata_url, hadsData.obs_retrieval_url, hadsData.states_url
Out[8]:
In [9]:
respCollect = hadsData.collect()
In [10]:
len(respCollect.elements)
Out[10]:
In [11]:
sta = respCollect.elements[0]
obsprops_bystdname_dict = obsprops_bystdname(sta)
In [12]:
print sta.get_location()
sta._properties
Out[12]:
In [13]:
obsprops_bystdname_dict.keys(), obsprops_bystdname_dict['dissolved_oxygen']
Out[13]:
In [14]:
flattenedsta_0 = map(flatten_element, sta.elements)
sta_0df = pd.DataFrame.from_records(flattenedsta_0, index=['time'])
sta_0df.head()
Out[14]:
In [15]:
#hadsData._get_metadata(['346F229A'])
# ---------------------------------
#respMeta, respRawdata = hadsData.raw()
#respMeta
#Out[]: u'|346F229A|SSNO3|NERRS WATER QUALITY SITE AT SOUTH SLOUGH NORTH SPIT NEAR NORTH BEND 2WNW (CTCLUSI)|43 24 50|-124 16 44|MFR|OR|NOAERD|SU|34 |000930|60|TW|15,-9|0.018,-9|32.0|9|0.0|0.0|WC|15,-9|0.01,-9|0.0|9|0.0|0.0|WS|15,-9|0.01,-9|0.0|9|0.0|0.0|WX|15,-9|0.01,-9|0.0|9|0.0|0.0|WO|15,-9|0.01,-9|0.0|9|0.0|0.0|HM|15,-9|0.03281,-9|0.0|9|0.0|0.0|WP|15,-9|0.01,-9|0.0|9|0.0|0.0|WT|15,-9|0.01,-9|0.0|9|0.0|0.0|VB|15,-9|0.01,-9|0.0|9|0.0|0.0|\r\n'
In [16]:
# Time series plot.
obsprop_name = 'sea_water_temperature'
obsprop = obsprops_bystdname_dict[obsprop_name]
#sta_0df[obsprop_name].plot()
#ylabel(obsprop_name + ' ('+obsprop['unit']+')');
# THE VARIABLE COLUMNS ARE NOT NUMERIC (PROBABLY DUE TO THE 'NaN' values),
# SO .plot() RETURNS AN ERROR.
# 6/18/2014. I've fixed flatten_elements() to handle this, and it now works!
In [17]:
sta_0df.shape
Out[17]:
In [18]:
sta_0df.dtypes
Out[18]:
In [19]:
sta_0df.tail()
Out[19]:
In [20]:
len(sta.elements), type(sta.elements[0])
Out[20]:
In [21]:
stael0 = sta.elements[0]
stael0.members
Out[21]:
In [22]:
sta_0df[obsprop_name].plot()
ylabel(obsprop_name + ' ('+obsprop['unit']+')');