In [1]:
"""
Auxiliary functions for xml_to_yaml_converter.py

isAllWhiteSpace checks to see if there is really text
covertXml2Yaml creates a dirty dictionary of the yaml file
create_yaml_file modifies the dirty dictionary to fit the format of TopoFlow parameter files

perignon@colorado.edu
June 8, 2015
"""

import yaml
import string, re
from xml.dom import minidom
from xml.dom import Node
import numpy as np
from collections import OrderedDict

class UnsortableList(list):
    def sort(self, *args, **kwargs):
        pass
    
class UnsortableOrderedDict(OrderedDict):
    def items(self, *args, **kwargs):
        return UnsortableList(OrderedDict.items(self, *args, **kwargs))
    
yaml.add_representer(UnsortableOrderedDict, yaml.representer.SafeRepresenter.represent_dict)


NonWhiteSpacePattern = re.compile('\S')
def isAllWhiteSpace(text):
    if NonWhiteSpacePattern.search(text):
        return 0
    return 1


def convertXml2Yaml(obj, objL):
    objDict = {}
    
    attrs = obj.attributes
    if attrs.length > 0:
        attrDict = {}
        for idx in range(attrs.length):
            attr = attrs.item(idx)
            attrDict[attr.name] = attr.value
        objL.append(attrDict)
        
    text = []
    for child in obj.childNodes:
        if child.nodeType == Node.TEXT_NODE and not isAllWhiteSpace(child.nodeValue):
            text.append(child.nodeValue)
    if text:
        textStr = "".join(text)
        objDict[obj.nodeName] = textStr
        
    children = []
    for child in obj.childNodes:
        if child.nodeType == Node.ELEMENT_NODE:
            obj, ls = convertXml2Yaml(child, objL)
            if obj:
                children.append(obj)
    if children:
        objL.append(children)
        
    return objDict, objL

In [2]:
def expand_list(obj):
    obj[0] = [obj[0]]
    ext_obj = []
    extend = ext_obj.extend
    for l in obj: extend(l)
    vals = dict([(str(item.keys()[0]),str(item.values()[0])) for item in ext_obj])
    
    return vals

In [132]:
def create_yaml_file(inFileName):

    doc = minidom.parse(inFileName)
    root = doc.childNodes[0]
    [objD, objL] = convertXml2Yaml(root,[])

    # remove the structures that are too broad and leave only the ones that relate specifically to parameters

    flags = np.diff([len(item) for item in objL] + [1]) # the zeros are the bad entries
    objList = [objL[i] for i,j in enumerate(flags) if j != 0]

    # turn all lists in the list to individual dictionaries, clean the formatting

    ind_dict = [i for i,j in enumerate(objList) if type(j) is dict] + [len(objList) + 1]

    allParams = []
    for i,ind_list in enumerate(ind_dict[:-1]):
        obj = objList[ind_list:ind_dict[i+1]]

        objDict = expand_list(obj)

        param = UnsortableOrderedDict()
        vals = UnsortableOrderedDict()
        range_vals = UnsortableOrderedDict()

        try: param['key'] = string.rsplit(objDict.pop('name'),'/')[-1]
        except: pass
        try: param['name'] = string.rsplit(objDict.pop('label'),':')[0]
        except: pass
        try:
            [objDict['help_brief'], units, bools] = check_for_units(objDict['help_brief'])
            param['description'] = objDict.pop('help_brief')
            
            if units and not objDict.has_key('units'):
                objDict['units'] = units
            elif units and objDict.has_key('units'):
                print 'Two sets of units!', units, objDict['units']
            else: pass
            
            if bools:
                objDict['type'] = 'choice'
                objDict['choices'] = bools
            
        except: pass
        
        try: vals['type'] = objDict.pop('type')
        except: pass
        try: vals['default'] = objDict.pop('default')
        except: pass
        
        try: range_vals['min'] = objDict.pop('min')
        except: pass
        try: range_vals['max'] = objDict.pop('max')
        except: pass
        
        if len(range_vals)>0:
            vals['range'] = range_vals
        
        try: vals['choices'] = objDict.pop('choices')
        except: pass
        
        param['value'] = vals

        try: param['units'] = objDict.pop('units')
        except:
            param['units'] = 'None'
            pass
        
        assert (len(objDict) == 0), "items left in the parameter entry definition! %r" % objDict.keys()

        allParams.append(param)
        
    return allParams

In [135]:
def check_for_units(obj):
    ''' checks for units and multiple-choice options in the description'''

    ## units
    units_regx = "\[.*\]"
    units_s = re.search(units_regx, obj)
    
    if units_s:
        # clean up units
        units = (units_s.group())[1:-1]
        
        if re.search('\s',units):
            obj = obj.replace(units_s.group(),'(' + units + ')')
            units = None
        else:
            obj = obj.replace(units_s.group(),'')
            
        if units == 'none':
            units = None
    else:
        units = None
    
    ## boolean / multiple choices
    bool_regx = "\{.*\}"
    bool_s = re.search(bool_regx, obj)
    
    if bool_s:
        # clean up units
        bools = (bool_s.group())[1:-1]
        bools = string.split(bools,'; ')
        obj = obj.replace(bool_s.group(),'')
    else:
        bools = None
        
    obj = re.sub('\s+', ' ', obj).strip()
    
    return obj, units, bools

In [136]:
"""
xml_to_yaml_converter.py

Converts xml files to yaml files
For translating TopoFlow metadata

perignon@colorado.edu
June 8, 2015
"""

import yaml
import glob, os, sys
# from xml_to_yaml_aux import create_yaml_file

xml_dir = 'xml' # parent directory for xml files
yaml_dir = 'yaml' # parent directory for new parameter dirs

if not os.path.exists(yaml_dir):
    os.makedirs(yaml_dir)

files = glob.glob(xml_dir + '/*.xml') # xml files

for f in [files[0]]:
    
    # create directories and files
    inFileName = f
    print 'Translating ' + inFileName[4:]
      
    paramName = f[4:-4]
    param_dir = yaml_dir + '/' + paramName
    
    if not os.path.exists(param_dir):
        os.makedirs(param_dir)
    
    outFileName = param_dir + '/parameters.yaml'
    
	# Translate
    allEntries = create_yaml_file(inFileName)

    # Save the file
    with open(outFileName, 'w') as outfile:
        outStr = yaml.dump(allEntries, default_flow_style=False, stream = outfile)


Translating Channels_Diffusive_Wave.xml

In [ ]: