In [1]:
import json
import glob
import sexpdata
def extract_parse(p):
if isinstance(p, sexpdata.Symbol):
return p.value()
elif isinstance(p, int):
return str(p)
elif isinstance(p, bool):
return str(p).lower()
elif isinstance(p, float):
return str(p).lower()
return tuple(extract_parse(q) for q in p)
def parse_tree(p):
if "'" in p:
p = "none"
parsed = sexpdata.loads(p)
extracted = extract_parse(parsed)
return extracted
def layout_from_parsing(parse):
if isinstance(parse, str):
return ("_Find",)
head = parse[0]
if len(parse) > 2: # fuse multiple tokens with "_And"
assert(len(parse)) == 3
below = ("_And", layout_from_parsing(parse[1]),
layout_from_parsing(parse[2]))
else:
below = layout_from_parsing(parse[1])
if head == "is":
module = "_Answer"
elif head in ["above", "below", "left_of", "right_of"]:
module = "_Transform"
return (module, below)
def flatten_layout(module_layout):
# Postorder traversal to generate Reverse Polish Notation (RPN)
if isinstance(module_layout, str):
return [module_layout]
RPN = []
module = module_layout[0]
for m in module_layout[1:]:
RPN += flatten_layout(m)
RPN += [module]
return RPN
In [2]:
data_files = glob.glob('../../exp-vqa-shape/vqa_shape_dataset/*.query')
In [3]:
for file in data_files:
with open(file) as f:
layout_symbols = [flatten_layout(layout_from_parsing(parse_tree(l)))
for l in f.readlines()]
save_file = file.replace('vqa_shape_dataset', 'data') + '_layout_symbols.json'
with open(save_file, 'w') as f:
json.dump(layout_symbols, f)