Use CSW to find data at NODC and DATA.GOV


In [1]:
import numpy as np

In [2]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes

In [3]:
# Searching: "sea_water_temperature" AND NODC NOT TAO 
# returns 24 datasets on Data.gov

val = 'sea\_water\_temperature'
filter1=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')

val = 'NODC'
filter2=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')

val = 'TAO'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter3 = fes.Not([filt])

filter_list = [fes.And([filter1, filter2, filter3])]

Find results at NODC


In [4]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC/UAF Geoportal: granule level
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [5]:
csw.get_operation_by_name('GetRecords').constraints


Out[5]:
[Constraint: SupportedCommonQueryables - ['Subject', 'Title', 'Abstract', 'AnyText', 'Format', 'Identifier', 'Modified', 'Type', 'BoundingBox'],
 Constraint: SupportedISOQueryables - ['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Degree', 'apiso:AccessConstraints', 'apiso:OtherConstraints', 'apiso:Classification', 'apiso:ConditionApplyingToAccessAndUse', 'apiso:Lineage', 'apiso:ResponsiblePartyRole', 'apiso:SpecificationTitle', 'apiso:SpecificationDate', 'apiso:SpecificationDateType']]

In [6]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[6]:
1000

In [7]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


VIIRS_NPP-NAVO-L2P-v1.0.20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc
Out[7]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.usno.navy.mil/NAVO'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.nodc.noaa.gov/SatelliteData/ghrsst/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7BB727D83C-0A78-4382-897A-B027BAAC00BB%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:TDS',
  'url': 'http://data.nodc.noaa.gov/thredds/catalog/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/catalog.html?dataset=ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc'},
 {'scheme': 'urn:x-esri:specification:ServiceType:FTP',
  'url': 'ftp://ftp.nodc.noaa.gov/pub/data.nodc/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://data.nodc.noaa.gov/opendap/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:Download',
  'url': 'http://data.nodc.noaa.gov/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WMS',
  'url': 'http://data.nodc.noaa.gov/thredds/wms/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc?service=WMS&version=1.3.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCS',
  'url': 'http://data.nodc.noaa.gov/thredds/wcs/ghrsst/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/057/20140226163706-NAVO-L2P_GHRSST-SST1m-VIIRS_NPP-v02.0-fv01.0.nc?service=WCS&version=1.0.0&request=GetCapabilities'}]

Find model data at CATALOG.DATA.GOV


In [8]:
endpoint = 'http://catalog.data.gov/csw-all' # CSW for granules
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[8]:
'2.0.2'

In [9]:
csw.get_operation_by_name('GetRecords').constraints


Out[9]:
[Constraint: SupportedISOQueryables - ['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Lineage', 'apiso:Classification', 'apiso:Creator', 'apiso:Relation', 'apiso:OtherConstraints', 'apiso:SpecificationTitle', 'apiso:ResponsiblePartyRole', 'apiso:SpecificationDateType', 'apiso:Degree', 'apiso:Contributor', 'apiso:ConditionApplyingToAccessAndUse', 'apiso:SpecificationDate', 'apiso:AccessConstraints', 'apiso:Publisher'],
 Constraint: SupportedDublinCoreQueryables - ['dc:contributor', 'dc:source', 'dc:language', 'dc:title', 'dc:subject', 'dc:creator', 'dc:type', 'ows:BoundingBox', 'dct:modified', 'dct:abstract', 'dc:relation', 'dc:date', 'dc:identifier', 'dc:publisher', 'dc:format', 'csw:AnyText', 'dc:rights']]

In [10]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[10]:
10

In [11]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


gov.noaa.nodc:0129829
Out[11]:
[{'scheme': 'HTTP', 'url': 'http://accession.nodc.noaa.gov/0129829'},
 {'scheme': 'HTTP', 'url': 'http://accession.nodc.noaa.gov/oas/129829'},
 {'scheme': 'HTTP', 'url': 'http://accession.nodc.noaa.gov/download/129829'},
 {'scheme': 'FTP',
  'url': 'ftp://ftp.nodc.noaa.gov/nodc/archive/arc0077/0129829/'}]

In [ ]:


In [ ]: