In [1]:
import re

In [2]:
opt_pattern_format = "({})?"
row_pattern_base = r'^(?P<level>\d{1,3})\s+(?P<name>\S+)'
row_pattern_occur = r'\s+OCCURS\s+(\d+\s+TO\s+)?(?P<occurs>\d+)(\s+TIMES)?'
row_pattern_indexed_by = r"\s+INDEXED BY\s(?P<indexed_by>\S+)"
row_pattern_redefine = r"\s+REDEFINES\s+(?P<redefines>\S+)"
row_pattern_pic = r'\s+PIC\s+(?P<pic>\S+)'
row_pattern_usage = r'\s+(USAGE\s+)?(IS\s+)?(?P<usage>\S+)'
row_pattern_remainder = r'(?P<remainder>.*)'
row_pattern_end = r'\.$'
row_pattern = re.compile(row_pattern_base +
                         opt_pattern_format.format(row_pattern_redefine) +
                         opt_pattern_format.format(row_pattern_occur) +
                         opt_pattern_format.format(row_pattern_indexed_by) +
                         opt_pattern_format.format(row_pattern_pic) +
                         opt_pattern_format.format(row_pattern_usage) +
                         row_pattern_remainder +
                         row_pattern_end)
row_pattern_redefines = re.compile(row_pattern_redefine)

row_pattern_occurs = re.compile(row_pattern_occur)

In [3]:
line = """      05  AMCR-LB-RO-FREQ PIC 99 OCCURS 4 TIMES INDEXED BY X-LB-ROF."""

In [5]:
match = row_pattern.match(line.strip())
match = match.groupdict()
match


Out[5]:
{'indexed_by': None,
 'level': '05',
 'name': 'AMCR-LB-RO-FREQ',
 'occurs': None,
 'pic': '99',
 'redefines': None,
 'remainder': ' 4 TIMES INDEXED BY X-LB-ROF',
 'usage': 'OCCURS'}

In [12]:
if 'OCCURS' in line and not match['occurs']:
    match['occurs'] = row_pattern_occurs.search(line).groupdict().get('occurs')

In [13]:
match


Out[13]:
{'indexed_by': None,
 'level': '05',
 'name': 'AMCR-LB-RO-FREQ',
 'occurs': '4',
 'pic': '99',
 'redefines': None,
 'remainder': ' 4 TIMES INDEXED BY X-LB-ROF',
 'usage': 'OCCURS'}

In [ ]: