Use CSW to find model data at NODC, NGDC, DATA.GOV and PACIOOS


In [34]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import netCDF4
import datetime as dt

In [35]:
# hopefully something like this will be implemented in fes soon
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
    return start,stop

In [36]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    """
    Get all URLs matching a specific ServiceType 
 
    Unfortunately these seem to differ between different CSW-ISO services.
    For example, OpenDAP is specified:
    NODC geoportal: 'urn:x-esri:specification:ServiceType:OPeNDAP'
    NGDC geoportal: 'urn:x-esri:specification:ServiceType:odp:url'
    """

    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

In [36]:


In [37]:
# trying to do this search:
# ('roms' OR 'selfe' OR 'adcirc' OR 'ncom' OR 'hycom' OR 'fvcom') AND 'ocean' NOT 'regridded' NOT 'espresso'
# should return 11 records from NODC geoportal

model_name_list = ['roms','selfe','adcirc','ncom','hycom','fvcom']

#box=[-74.4751, 40.3890, -73.7432, 40.9397]
box=[-76.4751, 38.3890, -71.7432, 42.9397]

#box=[-180, -90, 180, 90]

# specific specific times (UTC) ...

In [38]:
# ... or relative to now
jd_start = dt.datetime.utcnow()- dt.timedelta(days=3)
jd_stop = dt.datetime.utcnow() + dt.timedelta(days=3)

In [39]:
# 2014 feb 10-15 storm
jd_start = dt.datetime(2014,2,10)
jd_stop = dt.datetime(2014,2,15)

In [40]:
# 2014 recent
jd_start = dt.datetime(2014,3,8)
jd_stop = dt.datetime(2014,3,11)

# 2011 
#jd_start = dt.datetime(2013,4,20)
#jd_stop = dt.datetime(2013,4,24)

In [41]:
# hurricane sandy
jd_start = dt.datetime(2012,10,26)
jd_stop = dt.datetime(2012,11,2)

In [42]:
start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date  = jd_stop.strftime('%Y-%m-%d %H:00')

jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M')
jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M')

print start_date,'to',stop_date

sos_name = 'water_surface_height_above_reference_datum'


2012-10-26 00:00 to 2012-11-02 00:00

In [43]:
# convert User Input into FES filters
start,stop = dateRange(start_date,stop_date)
bbox = fes.BBox(box)

or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                    escapeChar='\\',wildCard='*',singleChar='?') for val in model_name_list])

val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')])

filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]

Find model results at NODC


In [44]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC/UAF Geoportal: granule level
endpoint = 'https://gis.ncdc.noaa.gov/geoportal/csw202/discovery'
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [45]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType']

In [46]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[46]:
6

In [47]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


ncom.ncom_reg1_agg.NCOM_Region_1_Aggregation_best.ncd
Out[47]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.navo.navy.mil/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7B99CB524F-EA88-49F7-8C4C-7BB5077E052A%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:LAS',
  'url': 'http://ferret.pmel.noaa.gov/geoideLAS/getUI.do?data_url=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WMS',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/wms/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WMS&version=1.3.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCS',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/wcs/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WCS&version=1.0.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ERDDAP',
  'url': 'http://upwell.pfeg.noaa.gov/erddap/search/index.html?searchFor=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCT',
  'url': 'http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'}]

In [50]:
print csw.records[choice].xml


<csw:Record xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcmiBox="http://dublincore.org/documents/2000/07/11/dcmi-box/" xmlns:dct="http://purl.org/dc/terms/" xmlns:gml="http://www.opengis.net/gml" xmlns:ows="http://www.opengis.net/ows" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">ncom.ncom_reg1_agg.NCOM_Region_1_Aggregation_best.ncd</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">ncom.ncom_reg1_agg.NCOM_Region_1_Aggregation_best.ncd</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">GRID</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID">{99CB524F-EA88-49F7-8C4C-7BB5077E052A}</dc:identifier>
<dc:title>NCOM Region 1 Aggregation/Best Time Series</dc:title>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">downloadableData</dc:type>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">liveData</dc:type>
<dc:subject>USA/NOAA/NESDIS/NCDDC</dc:subject>
<dc:subject>forecast_period</dc:subject>
<dc:subject>sea_water_temperature</dc:subject>
<dc:subject>sea_water_salinity</dc:subject>
<dc:subject>eastward_sea_water_velocity</dc:subject>
<dc:subject>northward_sea_water_velocity</dc:subject>
<dc:subject>latitude</dc:subject>
<dc:subject>longitude</dc:subject>
<dc:subject>time</dc:subject>
<dc:subject>forecast_reference_time</dc:subject>
<dc:subject>Unified Access Framework (UAF)</dc:subject>
<dc:subject>The Unified Access Framework: A GEO-IDE project to integrate scientific data management and access</dc:subject>
<dc:subject>climatologyMeteorologyAtmosphere</dc:subject>
<dct:modified>2014-04-02T03:37:46+00:00</dct:modified>
<dct:abstract>Best time series, taking the data from the most recent run available.</dct:abstract>
<dct:abstract>Best time series, taking the data from the most recent run available.</dct:abstract>
<dct:abstract>Best time series, taking the data from the most recent run available.</dct:abstract>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink">http://www.navo.navy.mil/</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document">http://www.nodc.noaa.gov/geoportal/csw?getxml=%7B99CB524F-EA88-49F7-8C4C-7BB5077E052A%7D</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:OPeNDAP">http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:LAS">http://ferret.pmel.noaa.gov/geoideLAS/getUI.do?data_url=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:WMS">http://ecowatch.ncddc.noaa.gov/thredds/wms/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WMS&amp;version=1.3.0&amp;request=GetCapabilities</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:WCS">http://ecowatch.ncddc.noaa.gov/thredds/wcs/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WCS&amp;version=1.0.0&amp;request=GetCapabilities</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:ERDDAP">http://upwell.pfeg.noaa.gov/erddap/search/index.html?searchFor=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:WCT">http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd</dct:references>
<ows:WGS84BoundingBox>
<ows:LowerCorner>-100.0 0.0</ows:LowerCorner>
<ows:UpperCorner>-50.0 70.0</ows:UpperCorner>
</ows:WGS84BoundingBox>
<ows:BoundingBox>
<ows:LowerCorner>-100.0 0.0</ows:LowerCorner>
<ows:UpperCorner>-50.0 70.0</ows:UpperCorner>
</ows:BoundingBox>
<dct:date scheme="urn:x-esri:specification:startdate">2000-01-01Z</dct:date>
<dct:date scheme="urn:x-esri:specification:enddate">2013-03-01Z</dct:date>
<dc:source>{F88062CF-8262-477C-8A7E-33DDC9E124EB}</dc:source>
</csw:Record>


In [48]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
len(dap_urls)


Out[48]:
6

Find model results at NGDC


In [16]:
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' #  NGDC/IOOS Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[16]:
'2.0.2'

In [17]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType']

In [18]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[18]:
5

In [19]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


hycom_global
Out[19]:
[{'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://oos.soest.hawaii.edu/thredds/idd/ocn_mod.html?dataset=hycom_global'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://pacioos.org/voyager/index.html?b=-85.200475%2C-180%2C85.200475%2C180&o=ofore:5:f:d1'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://oos.soest.hawaii.edu/las/getUI.do?dsid=hycom_global&varid=temperature-hycom_global&auto=true'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_3D.graph'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_2D.graph'},
 {'scheme': 'urn:x-esri:specification:ServiceType:wms:url',
  'url': 'http://oos.soest.hawaii.edu/thredds/wms/pacioos/hycom/global?service=WMS&version=1.3.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:wcs:url',
  'url': 'http://oos.soest.hawaii.edu/thredds/wcs/pacioos/hycom/global?service=WCS&version=1.0.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:odp:url',
  'url': 'http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global'}]

In [20]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
len(dap_urls)


Out[20]:
3

Find model data at CATALOG.DATA.GOV


In [21]:
endpoint = 'http://catalog.data.gov/csw-all' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[21]:
'2.0.2'

In [22]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType']

In [23]:
filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[23]:
0

In [24]:
filter_list = [fes.And([ bbox, or_filt, not_filt]) ]
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[24]:
67

In [25]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


gov.noaa.ngdc.mgg.geophysics:G01149
Out[25]:
[{'scheme': 'None', 'url': 'http://www.ngdc.noaa.gov/seg/potfld/aromag.html'},
 {'scheme': 'None',
  'url': 'http://www.ngdc.noaa.gov/seg/potfld/amag_grd.shtml'}]

From the above, we can see that because the 'scheme' is 'None' on all the references, we can't extract the different service types, like OPeNDAP, WCS, etc.


In [26]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')  #NGDC
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')  #NODC
dap_urls = service_urls(csw.records,service_string='?????????')    #CATALOG.DATA.GOV
len(dap_urls)


Out[26]:
0

Search at PACIOOS


In [27]:
endpoint = 'http://oos.soest.hawaii.edu/cgi-bin/csw.py' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [28]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType']

In [29]:
filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[29]:
0

In [30]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-6361c64444f8> in <module>()
----> 1 choice=random.choice(list(csw.records.keys()))
      2 print choice
      3 csw.records[choice].references

/home/local/python27_epd/lib/python2.7/site-packages/numpy/random/mtrand.so in mtrand.RandomState.choice (numpy/random/mtrand/mtrand.c:7299)()

ValueError: a must be non-empty

In [31]:
filter_list = [fes.And([ bbox,  or_filt]) ]
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[31]:
1

In [33]:
for rec,item in csw.records.iteritems():
    print item.title


HYbrid Coordinate Ocean Model (HYCOM): Global

In [32]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


hycom_global
Out[32]:
[]

In [ ]: