In [8]:
from utilities import css_styles
css_styles()
Out[8]:
In [27]:
from pylab import *
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import random
import netCDF4
import pandas as pd
#import datetime as dt
from datetime import datetime, timedelta
from pyoos.collectors.coops.coops_sos import CoopsSos
from pyoos.collectors.nerrs.nerrs_soap import NerrsSoap
import cStringIO
import iris
import urllib2
import parser
from lxml import etree
import numpy as np
#generated for csw interface
#from fes_date_filter_formatter import fes_date_filter #date formatter (R.Signell)
import requests #required for the processing of requests
from utilities import *
from IPython.display import HTML
import folium #required for leaflet mapping
import calendar #used to get number of days in a month and year
Define space and time constraints
Kachemak Bay, because I know there's a DO sensor on a NERRS station on the Homer Spit.
In [28]:
#bounding box of interest,[bottom right[lon,lat], top left[lon,lat]]
#bounding_box_type = "box"
#bounding_box = [[-152.0,59.25],[-150.6,60.00]]
Atlantic Coast, Eric Bridger says NeraCOOS has DO measurements (yellow squares on their real-time data portal).
In [29]:
#bounding box of interest,[bottom right[lon,lat], top left[lon,lat]]
bounding_box_type = "box"
bounding_box = [[-72.5,41.0],[-68.5,44.0]]
#temporal range
#I'm just interested in this year
start_date = datetime(2014,1,1).strftime('%Y-%m-%d %H:00')
end_date = datetime(2014,7,13).strftime('%Y-%m-%d %H:00')
time_date_range = [start_date,end_date] #start_date_end_date
print bounding_box
print start_date,'to',end_date
Define the web-service endpoints to check
In [30]:
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' # NODC Geoportal: granule level
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw' # NODC Geoportal: collection level
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw' # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
#endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
#endpoint = 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw' # USGS Coastal and Marine Program
#endpoint = 'http://geoport.whoi.edu/geoportal/csw' # USGS Woods Hole Geoportal
#endpoint = 'http://geo.gov.ckan.org/csw' # CKAN testing site for new Data.gov
#endpoint = 'https://edg.epa.gov/metadata/csw' # EPA
#endpoint = 'http://cwic.csiss.gmu.edu/cwicv1/discovery' # CWIC
csw = CatalogueServiceWeb(endpoint,timeout=60)
for oper in csw.operations:
if oper.name == 'GetRecords':
#print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
pass
Define what possible variables we're looking for using CF standard names. I also added 'oxygen_concentration_in_sea_water' because I know that's what NERRS uses (even though it is not CF standard). Adding 'oxygen' alone does return some results via CSWs.
In [31]:
#put the names in a dict for ease of access
data_dict = {}
data_dict["doxygen"] = {"names":['fractional_saturation_of_oxygen_in_sea_water',
'mass_concentration_of_oxygen_in_sea_water',
'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water',
'moles_of_oxygen_per_unit_mass_in_sea_water',
'volume_fraction_of_oxygen_in_sea_water',
#'oxygen',
'oxygen_concentration_in_sea_water'],
"sos_name":["doxygen"]}
Set up OWSlib and it's FES filter capabilities. This puts our bounding box and data_dict into a form that OWSLib can use to hit our OGC web-service endpoints.
In [32]:
def fes_date_filter(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
if constraint == 'overlaps':
start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
elif constraint == 'within':
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
return start,stop
In [33]:
# convert User Input into FES filters
start,stop = fes_date_filter(start_date,end_date)
box = []
box.append(bounding_box[0][0])
box.append(bounding_box[0][1])
box.append(bounding_box[1][0])
box.append(bounding_box[1][1])
bbox = fes.BBox(box)
#use the search name to create search filter
or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?') for val in data_dict["doxygen"]["names"]])
#not sure if I need this or not
val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?')])
In [34]:
filter_list = [fes.And([bbox, start, stop, or_filt, not_filt]) ]
# connect to CSW, explore it's properties
# try request using multiple filters "and" syntax: [[filter1,filter2]]
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
In [35]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
"""
extract service_urls of a specific type (DAP, SOS) from records
"""
urls=[]
for key,rec in records.iteritems():
#create a generator object, and iterate through it until the match is found
#if not found, gets the default value (here "none")
url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
if url is not None:
urls.append(url)
return urls
What's in the result set?
In [36]:
#print records that are available
print endpoint
print "number of datasets available: ",len(csw.records.keys())
csw.records.keys()
Out[36]:
In [ ]: