Exploring CSW access in Python using OWSLib with Data.gov


In [1]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import numpy as np

In [2]:
endpoint = 'http://catalog.data.gov/csw-all' #  granules
#endpoint = 'http://catalog.data.gov/csw' #  collections
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[2]:
'2.0.2'

In [3]:
[op.name for op in csw.operations]


Out[3]:
['GetCapabilities',
 'GetRepositoryItem',
 'DescribeRecord',
 'GetDomain',
 'GetRecordById',
 'GetRecords']

In [4]:
csw.get_operation_by_name('GetRecords').constraints


Out[4]:
[Constraint: SupportedISOQueryables - ['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Lineage', 'apiso:Classification', 'apiso:Creator', 'apiso:Relation', 'apiso:OtherConstraints', 'apiso:SpecificationTitle', 'apiso:ResponsiblePartyRole', 'apiso:SpecificationDateType', 'apiso:Degree', 'apiso:Contributor', 'apiso:ConditionApplyingToAccessAndUse', 'apiso:SpecificationDate', 'apiso:AccessConstraints', 'apiso:Publisher'],
 Constraint: SupportedDublinCoreQueryables - ['dc:contributor', 'dc:source', 'dc:language', 'dc:title', 'dc:subject', 'dc:creator', 'dc:type', 'ows:BoundingBox', 'dct:modified', 'dct:abstract', 'dc:relation', 'dc:date', 'dc:identifier', 'dc:publisher', 'dc:format', 'csw:AnyText', 'dc:rights']]

In [5]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')


['ArcGIS REST API for 10', 'urn:ogc:serviceType:WebMapService']

In [6]:
val = 'salinity'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [7]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[7]:
10

In [8]:
for rec,item in csw.records.iteritems():
    print item.title


OC375L01: WHOI cruise 375 leg 01 aboard the R/V Oceanus from 2002-03-15 - 2002-04-01 (NODC Accession 0055289)
WATER DEPTH and Other Data from KNORR from 19710505 to 19710510 (NODC Accession 7100695)
Physical and chemical data collected using bottle and BTs casts in the TOGA Area of Atlantic Ocean from NOAA Ship RESEARCHER and other platforms from 25 June 1974 to 16 August 1974 (NODC Accession 7700649)
WATER DEPTH and Other Data from DERWENT HUNTER from 19590204 to 19591208 (NODC Accession 7601077)
Application of the Sea-Level Affecting Marshes Model (SLAMM 5.1) to Eastern Neck NWR
Temperature and salinity profile data from globally distributed Argo profiling floats for the week of 2007-07-08 for the Global Argo Data Repository, date ranged from 2002-03-30 to 2007-07-14 (NODC Accession 0029182)
Real-time current, wave, temperature, salinity, and meteorological data from Gulf of Maine Ocean Observing System (GoMOOS) buoys, 3/6/2005 - 3/12/2005 (NODC Accession 0002075)
Wave spectra, meteorological, and other data from the FERREL from 07 September 1982 to 06 November 1982 (NODC Accession 8500005)
Collections of data to assess the water quality of the Enchanted Lakes coastal watershed, Kailua, Oahu, Hawaii, November 2003 - April 2004 (NODC Accession 0002486)
Climate Forecast System Reanalysis (CFSR), for 1979 to 2011

In [12]:
choice=np.random.choice(list(csw.records.keys()))
print csw.records[choice].title
csw.records[choice].references


Climate Forecast System Reanalysis (CFSR), for 1979 to 2011
Out[12]:
[{'scheme': 'WMS', 'url': 'http://nomads.ncdc.noaa.gov/thredds/cfsr.html'},
 {'scheme': 'FTP', 'url': 'ftp://nomads.ncdc.noaa.gov/CFSR/'},
 {'scheme': 'HTTP', 'url': 'http://nomads.ncdc.noaa.gov/data/'},
 {'scheme': 'Pressure', 'url': 'http://nomads.ncdc.noaa.gov/modeldata/'}]

In [ ]:
offset = 10
startposition = 0

while True:
    csw.getrecords2(startposition=startposition, constraints=filter_list,maxrecords=20,esn='full')
    for record in csw.records:
        print record.title
    if csw.results['nextrecord'] == 0:
        break
    startposition += offset

In [ ]:
limit = 1000
offset = 10
 
startposition = 0
 
while True:
    csw.getrecords2(startposition=startposition, maxrecords=limit)
    for record in csw.records:
        print record
    if csw.results['nextrecord'] == 0:
        break
    startposition += offset

In [ ]:
csw.getrecords2(