In [1]:
import json
import glob
import sexpdata

def extract_parse(p):
    if isinstance(p, sexpdata.Symbol):
        return p.value()
    elif isinstance(p, int):
        return str(p)
    elif isinstance(p, bool):
        return str(p).lower()
    elif isinstance(p, float):
        return str(p).lower()
    return tuple(extract_parse(q) for q in p)

def parse_tree(p):
    if "'" in p:
        p = "none"
    parsed = sexpdata.loads(p)
    extracted = extract_parse(parsed)
    return extracted

def layout_from_parsing(parse):
    if isinstance(parse, str):
        return ("_Find",)
    head = parse[0]
    if len(parse) > 2:  # fuse multiple tokens with "_And"
        assert(len(parse)) == 3
        below = ("_And", layout_from_parsing(parse[1]),
                 layout_from_parsing(parse[2]))
    else:
        below = layout_from_parsing(parse[1])
    if head == "is":
        module = "_Answer"
    elif head in ["above", "below", "left_of", "right_of"]:
        module = "_Transform"
    return (module, below)

def flatten_layout(module_layout):
    # Postorder traversal to generate Reverse Polish Notation (RPN)
    if isinstance(module_layout, str):
        return [module_layout]
    RPN = []
    module = module_layout[0]
    for m in module_layout[1:]:
        RPN += flatten_layout(m)
    RPN += [module]
    return RPN

In [2]:
data_files = glob.glob('../../exp-vqa-shape/vqa_shape_dataset/*.query')

In [3]:
for file in data_files:
    with open(file) as f:
        layout_symbols = [flatten_layout(layout_from_parsing(parse_tree(l)))
                          for l in f.readlines()]
    save_file = file.replace('vqa_shape_dataset', 'data') + '_layout_symbols.json'
    with open(save_file, 'w') as f:
        json.dump(layout_symbols, f)