Outline of Notebook Elements:
figure out the different names for service urls - sos, wms, dap - and figure out how to loop those through so you don't leave off any.... df. function - figure that out and how to spit out tables for the assessment and how to graph that -- figure out some basic graphs to do for each extreme event
In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import csv
import re
import cStringIO
import urllib2
import parser
import pdb
from datetime import datetime
from pylab import *
from owslib.csw import CatalogueServiceWeb
from owslib.wms import WebMapService
from owslib.csw import CatalogueServiceWeb
from owslib.sos import SensorObservationService
from owslib.etree import etree
from owslib import fes
#from pyoos.utils.etree import etree
#from pyoos.parsers.ioos.one.timeseries import TimeSeries
#from pyoos.collectors.coops.coops_sos import CoopsSos
import netCDF4
endpoints = ['http://www.nodc.noaa.gov/geoportal/csw',
'http://www.ngdc.noaa.gov/geoportal/csw',
'http://catalog.data.gov/csw-all',
'http://geoport.whoi.edu/geoportal/csw',
'https://edg.epa.gov/metadata/csw',
'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw',
'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw']
variables = ['phytoplankton','zooplankton', 'fish', 'river', 'currents', 'bathymetry', 'wind']
In [2]:
cswlist = []
oplist = []
isolist = []
lenoplist =[]
lenisolist=[]
for endpoint in endpoints[:2]:
csw = CatalogueServiceWeb(endpoint,timeout=60)
cswlist.append(csw.version)
oplist.append([op.name for op in csw.operations])
for oper in csw.operations:
try:
if oper.name == 'GetRecords':
isolist.append(oper.constraints['SupportedISOQueryables']['values'])
except KeyError:
isolist.append('Keyerror')
lenoplist.append(len(oplist[-1]))
lenisolist.append(len(isolist[-1]))
In [3]:
CWSLIST = zip(endpoints,cswlist,oplist,lenoplist, isolist,lenisolist)
df = DataFrame(data = CWSLIST, columns = ['endpoint','CSW Version', 'Operations','#of Operations','ISO_Queryables','#isoqueryeables'])
print df
In [4]:
#IOOS Core Biological Variables include fish, phytoplankton, and zooplankton
#searching for these keywords in a particular registry provides titles of records that include a wide range of records.
#This provides a first step in discovery, and refining data sets for a particular time period and geography will be the next step.
#Variable search terms:
# phytoplankton
# zooplankton
# fish
# river flow rates
# sea surface currents #velocity vectors
# bathymetry
# bottom type
# wind
variables = ['phytoplankton','zooplankton', 'fish', 'river', 'currents', 'bathymetry', 'wind']
variables1 = []
records1 = []
records2 = []
titles1 = []
titles2=[]
lenrecords1 = []
lentitles1 = []
k = 0
for endpoint in endpoints[:2]:
csw = CatalogueServiceWeb(endpoints[k],timeout=60)
n = 0
for v in variables:
try:
csw.getrecords(keywords = [variables[n]], maxrecords = 60)
records1.append(csw.results)
except Exception, ex1:
records1.append('Error')
try:
for rec in csw.records:
titles1.append(csw.records[rec].title)
except Exception, ex1:
titles1.append('Error')
n +=1
titles2.append(titles1)
records2.append(records1)
lentitles1.append(len(titles2[k]))
lenrecords1.append(len(records2[k]))
k +=1
#records.append(records1)
#titles.append(title)
#records.append(csw.results)
#for rec in csw.records:
zipvar1 = zip(endpoints, records2,lenrecords1, titles2,lentitles1)
df = DataFrame(data = zipvar1, columns = ['endpoints', 'records1','lenrecords1', 'titles1','lentitles1'])
print df
In [5]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
urls=[]
for key,rec in records.iteritems():
#create a generator object, and iterate through it until the match is found
#if not found, gets the default value (here "none")
url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
if url is not None:
urls.append(url)
return urls
#Establish daterange definitions
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
if constraint == 'overlaps':
start = fes.PropertyIsLessThanOrEqualTo(propertyname='startDate', literal=stop_date)
stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='endDate', literal=start_date)
elif constraint == 'within':
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='startDate', literal=start_date)
stop = fes.PropertyIsLessThanOrEqualTo(propertyname='endDate', literal=stop_date)
return start,stop
In [6]:
variables1 = ['wind', 'temp']
n_wms_urls = []
n_dap_urls = []
n_sos_urls = []
wms_urls1= []
dap_urls1= []
sos_urls1= []
k = 0
for endpoint in endpoints[:2]:
csw = CatalogueServiceWeb(endpoints[k],timeout=60)
n = 0
for v in variables1:
try:
csw.getrecords(keywords = [variables[n]], maxrecords = 60)
sos_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:sos:url')
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:wms:url')
bbox = fes.BBox([180, 90, -180, -90])
start,stop = dateRange('1970-01-01','2014-02-01')
std_name = variables[n]
sos_name = variables[n]
keywords = fes.PropertyIsLike(propertyname='anyText', literal=std_name)
serviceType = fes.PropertyIsLike(propertyname='apiso:ServiceType', literal='*opendap*')
except Exception, ex1:
sos_urls = 'None'
dap_urls = 'None'
wms_urls = 'None'
n +=1
wms_urls1.append(wms_urls)
dap_urls1.append(dap_urls)
sos_urls1.append(sos_urls)
k += 1
print wms_urls
zipvar1 = zip(endpoints, wms_urls1, dap_urls1, sos_urls1)
#df = DataFrame(data = zipvar1, columns = ['endpoints', 'wms_urls', 'n_wms_urls', 'dap_urls', 'n_dap_urls', 'sos_urls', 'n_sos_urls'])
df = DataFrame(data = zipvar1, columns = ['endpoints', 'wms_urls', 'dap_urls', 'sos_urls'])
print df
In [7]:
k = 0
for endpoint in endpoints[:2]:
csw = CatalogueServiceWeb(endpoints[k],timeout=60)
print 'Endpoint: ' + endpoint
print 'Number of WMS: ' + str(len(wms_urls))
print 'Number of DAP: ' + str(len(dap_urls))
print 'Number of SOS: ' + str(len(sos_urls))
print ''
k +=1
In [ ]:
In [ ]: