In [1]:
import re
In [2]:
opt_pattern_format = "({})?"
row_pattern_base = r'^(?P<level>\d{1,3})\s+(?P<name>\S+)'
row_pattern_occur = r'\s+OCCURS\s+(\d+\s+TO\s+)?(?P<occurs>\d+)(\s+TIMES)?'
row_pattern_indexed_by = r"\s+INDEXED BY\s(?P<indexed_by>\S+)"
row_pattern_redefine = r"\s+REDEFINES\s+(?P<redefines>\S+)"
row_pattern_pic = r'\s+PIC\s+(?P<pic>\S+)'
row_pattern_usage = r'\s+(USAGE\s+)?(IS\s+)?(?P<usage>\S+)'
row_pattern_remainder = r'(?P<remainder>.*)'
row_pattern_end = r'\.$'
row_pattern = re.compile(row_pattern_base +
opt_pattern_format.format(row_pattern_redefine) +
opt_pattern_format.format(row_pattern_occur) +
opt_pattern_format.format(row_pattern_indexed_by) +
opt_pattern_format.format(row_pattern_pic) +
opt_pattern_format.format(row_pattern_usage) +
row_pattern_remainder +
row_pattern_end)
row_pattern_redefines = re.compile(row_pattern_redefine)
row_pattern_occurs = re.compile(row_pattern_occur)
In [3]:
line = """ 05 AMCR-LB-RO-FREQ PIC 99 OCCURS 4 TIMES INDEXED BY X-LB-ROF."""
In [5]:
match = row_pattern.match(line.strip())
match = match.groupdict()
match
Out[5]:
In [12]:
if 'OCCURS' in line and not match['occurs']:
match['occurs'] = row_pattern_occurs.search(line).groupdict().get('occurs')
In [13]:
match
Out[13]:
In [ ]: