Outline of Notebook Elements:

  • Theme Title
  • Questions
  • DISCOVERY Process (code and narrative)
  • ACCESS Process (code and narrative)
  • USE Process (code and narrative)
  • Results and Conclusions (narrative)

Theme: Extreme Event > Scenario B: Oil Tanker Spill

I. Question: Can we discover and access the core variables applicable to analysis of an Oil Tanker Spill from the available list of Registries?

figure out the different names for service urls - sos, wms, dap - and figure out how to loop those through so you don't leave off any.... df. function - figure that out and how to spit out tables for the assessment and how to graph that -- figure out some basic graphs to do for each extreme event


In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import csv
import re
import cStringIO
import urllib2
import parser
import pdb
from datetime import datetime
from pylab import *
from owslib.csw import CatalogueServiceWeb
from owslib.wms import WebMapService
from owslib.csw import CatalogueServiceWeb
from owslib.sos import SensorObservationService
from owslib.etree import etree
from owslib import fes
#from pyoos.utils.etree import etree
#from pyoos.parsers.ioos.one.timeseries import TimeSeries
#from pyoos.collectors.coops.coops_sos import CoopsSos
import netCDF4

endpoints = ['http://www.nodc.noaa.gov/geoportal/csw',
             'http://www.ngdc.noaa.gov/geoportal/csw',
             'http://catalog.data.gov/csw-all',
             'http://geoport.whoi.edu/geoportal/csw',
             'https://edg.epa.gov/metadata/csw',
             'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
             'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw',
             'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw']

variables = ['phytoplankton','zooplankton', 'fish', 'river', 'currents', 'bathymetry', 'wind']

A. DISCOVERY Process: Accessing Catalogs for variables required for oil spill modeling.


In [2]:
cswlist = []
oplist = []
isolist = []
lenoplist =[]
lenisolist=[]
for endpoint in endpoints[:2]:
    csw = CatalogueServiceWeb(endpoint,timeout=60)
    cswlist.append(csw.version)
    oplist.append([op.name for op in csw.operations])
    for oper in csw.operations:
        try:
            if oper.name == 'GetRecords':
                isolist.append(oper.constraints['SupportedISOQueryables']['values'])
        except KeyError:
            isolist.append('Keyerror')
    lenoplist.append(len(oplist[-1]))
    lenisolist.append(len(isolist[-1]))

In [3]:
CWSLIST = zip(endpoints,cswlist,oplist,lenoplist, isolist,lenisolist)
df = DataFrame(data = CWSLIST, columns = ['endpoint','CSW Version', 'Operations','#of Operations','ISO_Queryables','#isoqueryeables'])
print df


                                 endpoint CSW Version  \
0  http://www.nodc.noaa.gov/geoportal/csw       2.0.2   
1  http://www.ngdc.noaa.gov/geoportal/csw       2.0.2   

                                          Operations  #of Operations  \
0  [GetCapabilities, DescribeRecord, GetRecords, ...               5   
1  [GetCapabilities, DescribeRecord, GetRecords, ...               5   

                                      ISO_Queryables  #isoqueryeables  
0  [apiso:Subject, apiso:Title, apiso:Abstract, a...               37  
1  [apiso:Subject, apiso:Title, apiso:Abstract, a...               37  

[2 rows x 6 columns]

In [4]:
#IOOS Core Biological Variables include fish, phytoplankton, and zooplankton
#searching for these keywords in a particular registry provides titles of records that include a wide range of records.  
#This provides a first step in discovery, and refining data sets for a particular time period and geography will be the next step.
#Variable search terms:

#    phytoplankton
#    zooplankton
#    fish
#    river flow rates
#    sea surface currents #velocity vectors
#    bathymetry
#    bottom type
#    wind



variables = ['phytoplankton','zooplankton', 'fish', 'river', 'currents', 'bathymetry', 'wind']
variables1 = []
records1 = []
records2 = []
titles1 = []
titles2=[]
lenrecords1 = []
lentitles1 = []
k = 0
for endpoint in endpoints[:2]:    
    csw = CatalogueServiceWeb(endpoints[k],timeout=60)
    n = 0
    for v in variables:
        try:
            csw.getrecords(keywords = [variables[n]], maxrecords = 60)
            records1.append(csw.results)
        except Exception, ex1:
            records1.append('Error')
        try:
            for rec in csw.records:    
                titles1.append(csw.records[rec].title)
        except Exception, ex1:
            titles1.append('Error') 
        n +=1
    titles2.append(titles1)
    records2.append(records1)
    lentitles1.append(len(titles2[k]))
    lenrecords1.append(len(records2[k]))
    k +=1


    #records.append(records1)
    #titles.append(title)
    #records.append(csw.results)
    #for rec in csw.records:
zipvar1 = zip(endpoints, records2,lenrecords1, titles2,lentitles1)
df = DataFrame(data = zipvar1, columns = ['endpoints', 'records1','lenrecords1', 'titles1','lentitles1'])
print df


                                endpoints  \
0  http://www.nodc.noaa.gov/geoportal/csw   
1  http://www.ngdc.noaa.gov/geoportal/csw   

                                            records1  lenrecords1  \
0  [{u'matches': 37, u'nextrecord': 0, u'returned...            7   
1  [{u'matches': 37, u'nextrecord': 0, u'returned...           14   

                                             titles1  lentitles1  
0  [Fluorescence, Terra MODIS, OSU DB, 0.125 degr...         280  
1  [Fluorescence, Terra MODIS, OSU DB, 0.125 degr...         595  

[2 rows x 5 columns]
/home/local/python27_epd/lib/python2.7/site-packages/owslib/csw.py:189: UserWarning: Please use the updated 'getrecords2' method instead of 'getrecords'.  
        The 'getrecords' method will be upgraded to use the 'getrecords2' parameters
        in a future version of OWSLib.
  in a future version of OWSLib.""")

In [5]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

#Establish daterange definitions
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='startDate', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='endDate', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='startDate', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='endDate', literal=stop_date)
    return start,stop

In [6]:
variables1 = ['wind', 'temp']
n_wms_urls = []
n_dap_urls = []
n_sos_urls = []
wms_urls1= []
dap_urls1= []
sos_urls1= []
k = 0
for endpoint in endpoints[:2]:
    csw = CatalogueServiceWeb(endpoints[k],timeout=60)
    n = 0
    for v in variables1:
        try:
            csw.getrecords(keywords = [variables[n]], maxrecords = 60)
            sos_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:sos:url')
            dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
            wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:wms:url')
            bbox = fes.BBox([180, 90, -180, -90])
            start,stop = dateRange('1970-01-01','2014-02-01')
            std_name = variables[n]
            sos_name = variables[n]
            keywords = fes.PropertyIsLike(propertyname='anyText', literal=std_name)
            serviceType = fes.PropertyIsLike(propertyname='apiso:ServiceType', literal='*opendap*') 
        except Exception, ex1:
            sos_urls = 'None'
            dap_urls = 'None'
            wms_urls = 'None'
        n +=1       
    wms_urls1.append(wms_urls)
    dap_urls1.append(dap_urls)
    sos_urls1.append(sos_urls)
    k += 1
print wms_urls
zipvar1 = zip(endpoints, wms_urls1, dap_urls1, sos_urls1)
#df = DataFrame(data = zipvar1, columns = ['endpoints', 'wms_urls', 'n_wms_urls', 'dap_urls', 'n_dap_urls', 'sos_urls', 'n_sos_urls'])
df = DataFrame(data = zipvar1, columns = ['endpoints', 'wms_urls', 'dap_urls', 'sos_urls'])
print df


None
                                endpoints wms_urls dap_urls sos_urls
0  http://www.nodc.noaa.gov/geoportal/csw       []       []       []
1  http://www.ngdc.noaa.gov/geoportal/csw     None     None     None

[2 rows x 4 columns]

In [7]:
k = 0
for endpoint in endpoints[:2]:
    csw = CatalogueServiceWeb(endpoints[k],timeout=60)
    print 'Endpoint: ' + endpoint
    print 'Number of WMS: ' + str(len(wms_urls))
    print 'Number of DAP: ' + str(len(dap_urls))
    print 'Number of SOS: ' + str(len(sos_urls))
    print ''
    k +=1


Endpoint: http://www.nodc.noaa.gov/geoportal/csw
Number of WMS: 4
Number of DAP: 4
Number of SOS: 4

---------------------------------------------------------------------------
timeout                                   Traceback (most recent call last)
<ipython-input-7-052b62fbab55> in <module>()
      2 k = 0
      3 for endpoint in endpoints:
----> 4     csw = CatalogueServiceWeb(endpoints[k],timeout=60)
      5     print 'Endpoint: ' + endpoint
      6     print 'Number of WMS: ' + str(len(wms_urls))

/home/local/python27_epd/lib/python2.7/site-packages/owslib/csw.pyc in __init__(self, url, lang, version, timeout, skip_caps)
     71             self.request = '%s%s' % (bind_url(self.url), urlencode(data))
     72 
---> 73             self._invoke()
     74 
     75             if self.exceptionreport is None:

/home/local/python27_epd/lib/python2.7/site-packages/owslib/csw.pyc in _invoke(self)
    585 
    586         if isinstance(self.request, basestring):  # GET KVP
--> 587             self.response = urlopen(self.request, timeout=self.timeout).read()
    588         else:
    589             self.request = cleanup_namespaces(self.request)

/home/local/python27_epd/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout)
    124     if _opener is None:
    125         _opener = build_opener()
--> 126     return _opener.open(url, data, timeout)
    127 
    128 def install_opener(opener):

/home/local/python27_epd/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    398             req = meth(req)
    399 
--> 400         response = self._open(req, data)
    401 
    402         # post-process response

/home/local/python27_epd/lib/python2.7/urllib2.pyc in _open(self, req, data)
    416         protocol = req.get_type()
    417         result = self._call_chain(self.handle_open, protocol, protocol +
--> 418                                   '_open', req)
    419         if result:
    420             return result

/home/local/python27_epd/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    376             func = getattr(handler, meth_name)
    377 
--> 378             result = func(*args)
    379             if result is not None:
    380                 return result

/home/local/python27_epd/lib/python2.7/urllib2.pyc in http_open(self, req)
   1205 
   1206     def http_open(self, req):
-> 1207         return self.do_open(httplib.HTTPConnection, req)
   1208 
   1209     http_request = AbstractHTTPHandler.do_request_

/home/local/python27_epd/lib/python2.7/urllib2.pyc in do_open(self, http_class, req)
   1178         else:
   1179             try:
-> 1180                 r = h.getresponse(buffering=True)
   1181             except TypeError: # buffering kw not supported
   1182                 r = h.getresponse()

/home/local/python27_epd/lib/python2.7/httplib.pyc in getresponse(self, buffering)
   1028         response = self.response_class(*args, **kwds)
   1029 
-> 1030         response.begin()
   1031         assert response.will_close != _UNKNOWN
   1032         self.__state = _CS_IDLE

/home/local/python27_epd/lib/python2.7/httplib.pyc in begin(self)
    405         # read until we get a non-100 response
    406         while True:
--> 407             version, status, reason = self._read_status()
    408             if status != CONTINUE:
    409                 break

/home/local/python27_epd/lib/python2.7/httplib.pyc in _read_status(self)
    363     def _read_status(self):
    364         # Initialize with Simple-Response defaults
--> 365         line = self.fp.readline()
    366         if self.debuglevel > 0:
    367             print "reply:", repr(line)

/home/local/python27_epd/lib/python2.7/socket.pyc in readline(self, size)
    445             while True:
    446                 try:
--> 447                     data = self._sock.recv(self._rbufsize)
    448                 except error, e:
    449                     if e.args[0] == EINTR:

timeout: timed out

In [ ]:


In [ ]: