Check an opendap catalog


In [1]:
import pathlib
import xml.etree.ElementTree
import lxml.etree
import datetime
import requests
import tqdm

In [2]:
catalog = pathlib.Path('/Users/baart_f/src/thredds-docker/catalogList.xml')

In [3]:
tree = lxml.etree.parse(str(catalog))

In [6]:
refs = tree.findall('.//{http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0}catalogRef')
print(len(refs))


81

In [7]:
for i, ref in enumerate(tqdm.tqdm_notebook(refs)):
    url = ref.attrib['{http://www.w3.org/1999/xlink}href']    
    msg = ''
    parent = ref.getparent()
    
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code != 200:
            msg = "result not ok (status %s)" % (resp.status_code, )        
    except requests.exceptions.Timeout as e: 
        msg = "result not ok (timeout)"
    except requests.exceptions.ConnectionError as e:
        msg = "result not ok (connection error)"
        
    if (msg):
        comment = lxml.etree.Comment(
            'Url checked: not ok: %s (%s)' % (msg, datetime.datetime.now(), )
        )
        ref.addprevious(comment)
        ref_string = lxml.etree.tostring(ref)
        comment = lxml.etree.Comment(ref_string)        
        # replace by comment
        parent.replace(ref, comment)
    else:
        comment = lxml.etree.Comment('Url checked: ok (%s)' % (datetime.datetime.now(), ))
        ref.addprevious(comment)




In [8]:
tree.write('catalog.xml',  xml_declaration=True, pretty_print=True, encoding='UTF-8')

In [9]:
!open .

In [14]:
tree


Out[14]:
<lxml.etree._ElementTree at 0x10465f588>

In [ ]: